From 16469de0683a7fca7a14224afef145c13e711782 Mon Sep 17 00:00:00 2001 From: Andreas Knuth Date: Thu, 12 Feb 2026 17:03:00 -0600 Subject: [PATCH] new node.js impl., removed old stuff --- deprecated_worker/Dockerfile | 26 - deprecated_worker/docker-compose.yml | 53 -- deprecated_worker/manage-worker.sh | 30 - deprecated_worker/update-all-workers.sh | 19 - deprecated_worker/worker.py | 885 ------------------ deprecated_worker/worker.py.old | 520 ----------- email-worker-nodejs/.env.example | 38 + email-worker-nodejs/Dockerfile | 34 + email-worker-nodejs/blocklist.ts | 62 ++ email-worker-nodejs/bounce-handler.ts | 190 ++++ email-worker-nodejs/config.ts | 115 +++ email-worker-nodejs/delivery.ts | 154 +++ email-worker-nodejs/docker-compose.yml | 23 + email-worker-nodejs/domain-poller.ts | 151 +++ email-worker-nodejs/dynamodb.ts | 230 +++++ email-worker-nodejs/health.ts | 48 + email-worker-nodejs/logger.ts | 98 ++ email-worker-nodejs/main.ts | 89 ++ email-worker-nodejs/message-processor.ts | 328 +++++++ email-worker-nodejs/metrics.ts | 155 ++++ email-worker-nodejs/package.json | 37 + email-worker-nodejs/parser.ts | 120 +++ email-worker-nodejs/rules-processor.ts | 413 +++++++++ email-worker-nodejs/s3.ts | 202 ++++ email-worker-nodejs/ses.ts | 52 ++ email-worker-nodejs/sqs.ts | 99 ++ email-worker-nodejs/tsconfig.json | 22 + email-worker-nodejs/unified-worker.ts | 102 ++ unified-worker/Dockerfile | 33 - unified-worker/docker-compose.yml | 68 -- unified-worker/domains.txt | 14 - unified-worker/requirements.txt | 2 - unified-worker/unified_worker.py | 1081 ---------------------- 33 files changed, 2762 insertions(+), 2731 deletions(-) delete mode 100644 deprecated_worker/Dockerfile delete mode 100644 deprecated_worker/docker-compose.yml delete mode 100644 deprecated_worker/manage-worker.sh delete mode 100755 deprecated_worker/update-all-workers.sh delete mode 100755 deprecated_worker/worker.py delete mode 100644 deprecated_worker/worker.py.old create mode 100644 email-worker-nodejs/.env.example create mode 100644 email-worker-nodejs/Dockerfile create mode 100644 email-worker-nodejs/blocklist.ts create mode 100644 email-worker-nodejs/bounce-handler.ts create mode 100644 email-worker-nodejs/config.ts create mode 100644 email-worker-nodejs/delivery.ts create mode 100644 email-worker-nodejs/docker-compose.yml create mode 100644 email-worker-nodejs/domain-poller.ts create mode 100644 email-worker-nodejs/dynamodb.ts create mode 100644 email-worker-nodejs/health.ts create mode 100644 email-worker-nodejs/logger.ts create mode 100644 email-worker-nodejs/main.ts create mode 100644 email-worker-nodejs/message-processor.ts create mode 100644 email-worker-nodejs/metrics.ts create mode 100644 email-worker-nodejs/package.json create mode 100644 email-worker-nodejs/parser.ts create mode 100644 email-worker-nodejs/rules-processor.ts create mode 100644 email-worker-nodejs/s3.ts create mode 100644 email-worker-nodejs/ses.ts create mode 100644 email-worker-nodejs/sqs.ts create mode 100644 email-worker-nodejs/tsconfig.json create mode 100644 email-worker-nodejs/unified-worker.ts delete mode 100644 unified-worker/Dockerfile delete mode 100644 unified-worker/docker-compose.yml delete mode 100644 unified-worker/domains.txt delete mode 100644 unified-worker/requirements.txt delete mode 100644 unified-worker/unified_worker.py diff --git a/deprecated_worker/Dockerfile b/deprecated_worker/Dockerfile deleted file mode 100644 index 62fda1c..0000000 --- a/deprecated_worker/Dockerfile +++ /dev/null @@ -1,26 +0,0 @@ -FROM python:3.11-slim - -# Metadata -LABEL maintainer="your-email@example.com" -LABEL description="Domain-specific email worker for SMTP delivery" - -# Non-root user für Security -RUN useradd -m -u 1000 worker && \ - mkdir -p /app && \ - chown -R worker:worker /app - -# Boto3 installieren -RUN pip install --no-cache-dir boto3 - -# Worker Code -COPY --chown=worker:worker worker.py /app/worker.py - -WORKDIR /app -USER worker - -# Healthcheck -HEALTHCHECK --interval=30s --timeout=10s --start-period=10s --retries=3 \ - CMD pgrep -f worker.py || exit 1 - -# Start worker mit unbuffered output -CMD ["python", "-u", "worker.py"] \ No newline at end of file diff --git a/deprecated_worker/docker-compose.yml b/deprecated_worker/docker-compose.yml deleted file mode 100644 index dc98231..0000000 --- a/deprecated_worker/docker-compose.yml +++ /dev/null @@ -1,53 +0,0 @@ -services: - worker: - image: python:3.11-slim - container_name: email-worker-${WORKER_DOMAIN} - restart: unless-stopped - network_mode: host # Zugriff auf lokales Netzwerk für Postfix - - # Worker-Code mounten - volumes: - - ./worker.py:/app/worker.py:ro - - working_dir: /app - - # Python Dependencies installieren und Worker starten - command: > - sh -c "apt-get update && - apt-get install -y --no-install-recommends procps && - rm -rf /var/lib/apt/lists/* && - pip install --no-cache-dir boto3 && - python -u worker.py" - - environment: - # ⚠️ WICHTIG: WORKER_DOMAIN muss von außen gesetzt werden! - - WORKER_DOMAIN=${WORKER_DOMAIN} - - # AWS Credentials - - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} - - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} - - # Worker Settings - - POLL_INTERVAL=${POLL_INTERVAL:-20} - - MAX_MESSAGES=${MAX_MESSAGES:-10} - - VISIBILITY_TIMEOUT=${VISIBILITY_TIMEOUT:-300} - - # SMTP Configuration - - SMTP_HOST=${SMTP_HOST:-localhost} - - SMTP_PORT=${SMTP_PORT:-25} - - SMTP_USE_TLS=${SMTP_USE_TLS:-false} - - SMTP_USER=${SMTP_USER:-} - - SMTP_PASS=${SMTP_PASS:-} - - logging: - driver: "json-file" - options: - max-size: "10m" - max-file: "5" - - healthcheck: - test: ["CMD", "pgrep", "-f", "worker.py"] - interval: 30s - timeout: 10s - retries: 3 - start_period: 10s \ No newline at end of file diff --git a/deprecated_worker/manage-worker.sh b/deprecated_worker/manage-worker.sh deleted file mode 100644 index 7d6a52b..0000000 --- a/deprecated_worker/manage-worker.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash -# manage-worker.sh - -DOMAIN=$1 - -if [ -z "$DOMAIN" ]; then - echo "Usage: $0 [action]" - echo "Example: $0 andreasknuth.de" - echo " $0 andreasknuth.de down" - echo " $0 andreasknuth.de logs -f" - exit 1 -fi - -# Entfernt den ersten Parameter ($1 / DOMAIN) aus der Argumentenliste -shift - -# Nimm ALLE verbleibenden Argumente ($@). Wenn keine da sind, nimm "up -d". -ACTION="${@:-up -d}" - -PROJECT_NAME="${DOMAIN//./-}" -ENV_FILE=".env.${DOMAIN}" - -if [ ! -f "$ENV_FILE" ]; then - echo "Error: $ENV_FILE not found!" - exit 1 -fi - -# $ACTION wird hier nicht in Anführungszeichen gesetzt, -# damit "logs -f" als zwei separate Befehle erkannt wird. -docker compose -p "$PROJECT_NAME" --env-file "$ENV_FILE" $ACTION \ No newline at end of file diff --git a/deprecated_worker/update-all-workers.sh b/deprecated_worker/update-all-workers.sh deleted file mode 100755 index 88f3a47..0000000 --- a/deprecated_worker/update-all-workers.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash -# update-all-workers.sh (smart version) - -DOMAINS=$(docker ps --filter "name=email-worker-" --format "{{.Names}}" | sed 's/email-worker-//') - -if [ -z "$DOMAINS" ]; then - echo "No workers found" - exit 1 -fi - -echo "Found workers: $DOMAINS" -echo "" - -for domain in $DOMAINS; do - echo "═══ $domain ═══" - ./manage-worker.sh "$domain" restart -done - -echo "✓ Done" \ No newline at end of file diff --git a/deprecated_worker/worker.py b/deprecated_worker/worker.py deleted file mode 100755 index 44d4c22..0000000 --- a/deprecated_worker/worker.py +++ /dev/null @@ -1,885 +0,0 @@ -import os -import sys -import boto3 -import smtplib -import json -import time -import traceback -import signal -from email.parser import BytesParser -from email.policy import SMTP as SMTPPolicy -from datetime import datetime -from botocore.exceptions import ClientError # Neu: Korrekter Import für SES-Exceptions - -# AWS Configuration -AWS_REGION = 'us-east-2' -s3 = boto3.client('s3', region_name=AWS_REGION) -sqs = boto3.client('sqs', region_name=AWS_REGION) -ses = boto3.client('ses', region_name=AWS_REGION) # Neu: Für OOO/Forwards - -# ✨ Worker Configuration (domain-spezifisch) -WORKER_DOMAIN = os.environ.get('WORKER_DOMAIN') # z.B. 'andreasknuth.de' -WORKER_NAME = os.environ.get('WORKER_NAME', f'worker-{WORKER_DOMAIN}') - -# Worker Settings -POLL_INTERVAL = int(os.environ.get('POLL_INTERVAL', '20')) -MAX_MESSAGES = int(os.environ.get('MAX_MESSAGES', '10')) -VISIBILITY_TIMEOUT = int(os.environ.get('VISIBILITY_TIMEOUT', '300')) - -# SMTP Configuration (einfach, da nur 1 Domain pro Worker) -SMTP_HOST = os.environ.get('SMTP_HOST', 'localhost') -SMTP_PORT = int(os.environ.get('SMTP_PORT', '25')) -SMTP_USE_TLS = os.environ.get('SMTP_USE_TLS', 'false').lower() == 'true' -SMTP_USER = os.environ.get('SMTP_USER') -SMTP_PASS = os.environ.get('SMTP_PASS') - -# Graceful shutdown -shutdown_requested = False - -# DynamoDB Ressource für Bounce-Lookup -# DynamoDB Ressource für Bounce-Lookup und Rules -try: - dynamo = boto3.resource('dynamodb', region_name=AWS_REGION) - msg_table = dynamo.Table('ses-outbound-messages') - rules_table = dynamo.Table('email-rules') # Neu: Für OOO/Forwards -except Exception as e: - log(f"Warning: Could not connect to DynamoDB: {e}", 'WARNING') - msg_table = None - rules_table = None - -def get_bucket_name(domain): - """Konvention: domain.tld -> domain-tld-emails""" - return domain.replace('.', '-') + '-emails' - -def is_ses_bounce_notification(parsed): - """ - Prüft ob Email von SES MAILER-DAEMON ist - """ - from_h = (parsed.get('From') or '').lower() - return 'mailer-daemon@us-east-2.amazonses.com' in from_h - - -def get_bounce_info_from_dynamodb(message_id, max_retries=3, retry_delay=1): - """ - Sucht Bounce-Info in DynamoDB anhand der Message-ID - Mit Retry-Logik für Timing-Issues - Returns: dict mit bounce info oder None - """ - import time - - for attempt in range(max_retries): - try: - response = msg_table.get_item(Key={'MessageId': message_id}) - item = response.get('Item') - - if item: - # Gefunden! - return { - 'original_source': item.get('original_source', ''), - 'bounceType': item.get('bounceType', 'Unknown'), - 'bounceSubType': item.get('bounceSubType', 'Unknown'), - 'bouncedRecipients': item.get('bouncedRecipients', []), - 'timestamp': item.get('timestamp', '') - } - - # Nicht gefunden - Retry falls nicht letzter Versuch - if attempt < max_retries - 1: - log(f" Bounce record not found yet, retrying in {retry_delay}s (attempt {attempt + 1}/{max_retries})...") - time.sleep(retry_delay) - else: - log(f"⚠ No bounce record found after {max_retries} attempts for Message-ID: {message_id}") - return None - - except Exception as e: - log(f"⚠ DynamoDB Error (attempt {attempt + 1}/{max_retries}): {e}", 'ERROR') - if attempt < max_retries - 1: - time.sleep(retry_delay) - else: - return None - - return None - - -def apply_bounce_logic(parsed, subject): - """ - Prüft auf SES Bounce, sucht in DynamoDB und schreibt Header um. - Returns: (parsed_email_object, was_modified_bool) - """ - if not is_ses_bounce_notification(parsed): - return parsed, False - - log("🔍 Detected SES MAILER-DAEMON bounce notification") - - # Message-ID aus Header extrahieren - message_id = (parsed.get('Message-ID') or '').strip('<>').split('@')[0] - - if not message_id: - log("⚠ Could not extract Message-ID from bounce notification") - return parsed, False - - log(f" Looking up Message-ID: {message_id}") - - # Lookup in DynamoDB - bounce_info = get_bounce_info_from_dynamodb(message_id) - - if not bounce_info: - return parsed, False - - # Bounce Info ausgeben - original_source = bounce_info['original_source'] - bounced_recipients = bounce_info['bouncedRecipients'] - bounce_type = bounce_info['bounceType'] - bounce_subtype = bounce_info['bounceSubType'] - - log(f"✓ Found bounce info:") - log(f" Original sender: {original_source}") - log(f" Bounce type: {bounce_type}/{bounce_subtype}") - log(f" Bounced recipients: {bounced_recipients}") - - # Nehme den ersten bounced recipient als neuen Absender - # (bei Multiple Recipients kann es mehrere geben) - if bounced_recipients: - new_from = bounced_recipients[0] - - # Rewrite Headers - parsed['X-Original-SES-From'] = parsed.get('From', '') - parsed['X-Bounce-Type'] = f"{bounce_type}/{bounce_subtype}" - parsed.replace_header('From', new_from) - - if not parsed.get('Reply-To'): - parsed['Reply-To'] = new_from - - # Subject anpassen - if 'delivery status notification' in subject.lower() or 'thanks for your submission' in subject.lower(): - parsed.replace_header('Subject', f"Delivery Status: {new_from}") - - log(f"✓ Rewritten FROM: {new_from}") - return parsed, True - - log("⚠ No bounced recipients found in bounce info") - return parsed, False - -def signal_handler(signum, frame): - global shutdown_requested - print(f"\n⚠ Shutdown signal received (signal {signum})") - shutdown_requested = True - - -signal.signal(signal.SIGTERM, signal_handler) -signal.signal(signal.SIGINT, signal_handler) - - -def log(message: str, level: str = 'INFO'): - """Structured logging with timestamp""" - timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') - print(f"[{timestamp}] [{level}] [{WORKER_NAME}] {message}", flush=True) - - -def domain_to_queue_name(domain: str) -> str: - """Konvertiert Domain zu SQS Queue Namen""" - return domain.replace('.', '-') + '-queue' - - -def get_queue_url() -> str: - """Ermittelt Queue-URL für die konfigurierte Domain""" - queue_name = domain_to_queue_name(WORKER_DOMAIN) - - try: - response = sqs.get_queue_url(QueueName=queue_name) - return response['QueueUrl'] - except Exception as e: - raise Exception(f"Failed to get queue URL for {WORKER_DOMAIN}: {e}") - - -def mark_as_processed(bucket: str, key: str, invalid_inboxes: list = None): - """ - Markiert E-Mail als erfolgreich zugestellt - Wird nur aufgerufen wenn mindestens 1 Recipient erfolgreich war - """ - try: - head = s3.head_object(Bucket=bucket, Key=key) - metadata = head.get('Metadata', {}) or {} - - metadata['processed'] = 'true' - metadata['processed_at'] = str(int(time.time())) - metadata['processed_by'] = WORKER_NAME - metadata['status'] = 'delivered' - metadata.pop('processing_started', None) - metadata.pop('queued_at', None) - - # Invalid inboxes speichern falls vorhanden - if invalid_inboxes: - metadata['invalid_inboxes'] = ','.join(invalid_inboxes) - log(f"⚠ Invalid inboxes recorded: {', '.join(invalid_inboxes)}", 'WARNING') - - s3.copy_object( - Bucket=bucket, - Key=key, - CopySource={'Bucket': bucket, 'Key': key}, - Metadata=metadata, - MetadataDirective='REPLACE' - ) - - log(f"✓ Marked s3://{bucket}/{key} as processed", 'SUCCESS') - - except Exception as e: - log(f"Failed to mark as processed: {e}", 'WARNING') - - -def mark_as_all_invalid(bucket: str, key: str, invalid_inboxes: list): - """ - Markiert E-Mail als fehlgeschlagen weil alle Recipients ungültig sind - """ - try: - head = s3.head_object(Bucket=bucket, Key=key) - metadata = head.get('Metadata', {}) or {} - - metadata['processed'] = 'true' - metadata['processed_at'] = str(int(time.time())) - metadata['processed_by'] = WORKER_NAME - metadata['status'] = 'failed' - metadata['error'] = 'All recipients are invalid (mailboxes do not exist)' - metadata['invalid_inboxes'] = ','.join(invalid_inboxes) - metadata.pop('processing_started', None) - metadata.pop('queued_at', None) - - s3.copy_object( - Bucket=bucket, - Key=key, - CopySource={'Bucket': bucket, 'Key': key}, - Metadata=metadata, - MetadataDirective='REPLACE' - ) - - log(f"✓ Marked s3://{bucket}/{key} as failed (all invalid)", 'SUCCESS') - - except Exception as e: - log(f"Failed to mark as all invalid: {e}", 'WARNING') - - -def mark_as_failed(bucket: str, key: str, error: str, receive_count: int): - """ - Markiert E-Mail als komplett fehlgeschlagen - Wird nur aufgerufen wenn ALLE Recipients fehlschlagen - """ - try: - head = s3.head_object(Bucket=bucket, Key=key) - metadata = head.get('Metadata', {}) or {} - - metadata['status'] = 'failed' - metadata['failed_at'] = str(int(time.time())) - metadata['failed_by'] = WORKER_NAME - metadata['error'] = error[:500] # S3 Metadata limit - metadata['retry_count'] = str(receive_count) - metadata.pop('processing_started', None) - - s3.copy_object( - Bucket=bucket, - Key=key, - CopySource={'Bucket': bucket, 'Key': key}, - Metadata=metadata, - MetadataDirective='REPLACE' - ) - - log(f"✗ Marked s3://{bucket}/{key} as failed: {error[:100]}", 'ERROR') - - except Exception as e: - log(f"Failed to mark as failed: {e}", 'WARNING') - - -def is_temporary_smtp_error(error_msg: str) -> bool: - """ - Prüft ob SMTP-Fehler temporär ist (Retry sinnvoll) - 4xx Codes = temporär, 5xx = permanent - """ - temporary_indicators = [ - '421', # Service not available - '450', # Mailbox unavailable - '451', # Local error - '452', # Insufficient storage - '4', # Generisch 4xx - 'timeout', - 'connection refused', - 'connection reset', - 'network unreachable', - 'temporarily', - 'try again' - ] - - error_lower = error_msg.lower() - return any(indicator in error_lower for indicator in temporary_indicators) - - -def is_permanent_recipient_error(error_msg: str) -> bool: - """ - Prüft ob Fehler permanent für diesen Recipient ist (Inbox existiert nicht) - 550 = Mailbox not found, 551 = User not local, 553 = Mailbox name invalid - """ - permanent_indicators = [ - '550', # Mailbox unavailable / not found - '551', # User not local - '553', # Mailbox name not allowed / invalid - 'mailbox not found', - 'user unknown', - 'no such user', - 'recipient rejected', - 'does not exist', - 'invalid recipient', - 'unknown user' - ] - - error_lower = error_msg.lower() - return any(indicator in error_lower for indicator in permanent_indicators) - - -def send_email(from_addr: str, recipient: str, raw_message: bytes) -> tuple: - """ - Sendet E-Mail via SMTP an EINEN Empfänger - Returns: (success: bool, error: str or None, is_permanent: bool) - """ - - try: - with smtplib.SMTP(SMTP_HOST, SMTP_PORT, timeout=30) as smtp: - smtp.ehlo() - - # STARTTLS falls konfiguriert - if SMTP_USE_TLS: - try: - smtp.starttls() - smtp.ehlo() - except Exception as e: - log(f" STARTTLS failed: {e}", 'WARNING') - - # Authentication falls konfiguriert - if SMTP_USER and SMTP_PASS: - try: - smtp.login(SMTP_USER, SMTP_PASS) - except Exception as e: - log(f" SMTP auth failed: {e}", 'WARNING') - - # E-Mail senden - result = smtp.sendmail(from_addr, [recipient], raw_message) - - # Result auswerten - if isinstance(result, dict) and result: - # Empfänger wurde abgelehnt - error = result.get(recipient, 'Unknown refusal') - is_permanent = is_permanent_recipient_error(str(error)) - log(f" ✗ {recipient}: {error} ({'permanent' if is_permanent else 'temporary'})", 'ERROR') - return False, str(error), is_permanent - else: - # Erfolgreich - log(f" ✓ {recipient}: Delivered", 'SUCCESS') - return True, None, False - - except smtplib.SMTPException as e: - error_msg = str(e) - is_permanent = is_permanent_recipient_error(error_msg) - log(f" ✗ {recipient}: SMTP error - {error_msg}", 'ERROR') - return False, error_msg, is_permanent - - except Exception as e: - # Connection errors sind immer temporär - log(f" ✗ {recipient}: Connection error - {e}", 'ERROR') - return False, str(e), False - -def extract_body_parts(parsed): - """ - Extrahiert sowohl text/plain als auch text/html Body-Parts. - Returns: (text_body: str, html_body: str or None) - """ - text_body = '' - html_body = None - - if parsed.is_multipart(): - for part in parsed.walk(): - content_type = part.get_content_type() - - if content_type == 'text/plain': - try: - text_body += part.get_payload(decode=True).decode('utf-8', errors='ignore') - except Exception as e: - log(f"⚠ Error decoding text/plain part: {e}", 'WARNING') - - elif content_type == 'text/html': - try: - html_body = part.get_payload(decode=True).decode('utf-8', errors='ignore') - except Exception as e: - log(f"⚠ Error decoding text/html part: {e}", 'WARNING') - else: - try: - payload = parsed.get_payload(decode=True) - if payload: - decoded = payload.decode('utf-8', errors='ignore') - if parsed.get_content_type() == 'text/html': - html_body = decoded - else: - text_body = decoded - except Exception as e: - log(f"⚠ Error decoding non-multipart body: {e}", 'WARNING') - text_body = str(parsed.get_payload()) - - return text_body.strip() if text_body else '(No body content)', html_body - - -def create_ooo_reply(original_parsed, recipient, ooo_msg, content_type='text'): - """ - Erstellt eine Out-of-Office Reply als komplette MIME-Message. - Behält Original-Body (text + html) bei. - """ - from email.mime.multipart import MIMEMultipart - from email.mime.text import MIMEText - from email.utils import formatdate, make_msgid - - text_body, html_body = extract_body_parts(original_parsed) - original_subject = original_parsed.get('Subject', '(no subject)') - original_from = original_parsed.get('From', 'unknown') - - # Neue Message erstellen - msg = MIMEMultipart('mixed') - msg['From'] = recipient - msg['To'] = original_from - msg['Subject'] = f"Out of Office: {original_subject}" - msg['Date'] = formatdate(localtime=True) - msg['Message-ID'] = make_msgid(domain=recipient.split('@')[1]) - msg['In-Reply-To'] = original_parsed.get('Message-ID', '') - msg['References'] = original_parsed.get('Message-ID', '') - msg['Auto-Submitted'] = 'auto-replied' # Verhindert Loops - - # Body-Teil erstellen - body_part = MIMEMultipart('alternative') - - # Text-Version - text_content = f"{ooo_msg}\n\n" - text_content += "--- Original Message ---\n" - text_content += f"From: {original_from}\n" - text_content += f"Subject: {original_subject}\n\n" - text_content += text_body - body_part.attach(MIMEText(text_content, 'plain', 'utf-8')) - - # HTML-Version (wenn gewünscht und Original vorhanden) - if content_type == 'html' or html_body: - html_content = f"
{ooo_msg}



" - html_content += "
" - html_content += f"Original Message
" - html_content += f"From: {original_from}
" - html_content += f"Subject: {original_subject}

" - html_content += (html_body if html_body else text_body.replace('\n', '
')) - html_content += "
" - body_part.attach(MIMEText(html_content, 'html', 'utf-8')) - - msg.attach(body_part) - return msg - - -def create_forward_message(original_parsed, recipient, forward_to, original_from): - """ - Erstellt eine Forward-Message als komplette MIME-Message. - Behält ALLE Original-Parts inkl. Attachments bei. - """ - from email.mime.multipart import MIMEMultipart - from email.mime.text import MIMEText - from email.utils import formatdate, make_msgid - - original_subject = original_parsed.get('Subject', '(no subject)') - original_date = original_parsed.get('Date', 'unknown') - - # Neue Message erstellen - msg = MIMEMultipart('mixed') - msg['From'] = recipient - msg['To'] = forward_to - msg['Subject'] = f"FWD: {original_subject}" - msg['Date'] = formatdate(localtime=True) - msg['Message-ID'] = make_msgid(domain=recipient.split('@')[1]) - msg['Reply-To'] = original_from - - # Forward-Header als Text - text_body, html_body = extract_body_parts(original_parsed) - - # Body-Teil - body_part = MIMEMultipart('alternative') - - # Text-Version - fwd_text = "---------- Forwarded message ---------\n" - fwd_text += f"From: {original_from}\n" - fwd_text += f"Date: {original_date}\n" - fwd_text += f"Subject: {original_subject}\n" - fwd_text += f"To: {recipient}\n\n" - fwd_text += text_body - body_part.attach(MIMEText(fwd_text, 'plain', 'utf-8')) - - # HTML-Version - if html_body: - fwd_html = "
" - fwd_html += "---------- Forwarded message ---------
" - fwd_html += f"From: {original_from}
" - fwd_html += f"Date: {original_date}
" - fwd_html += f"Subject: {original_subject}
" - fwd_html += f"To: {recipient}

" - fwd_html += html_body - fwd_html += "
" - body_part.attach(MIMEText(fwd_html, 'html', 'utf-8')) - - msg.attach(body_part) - - # WICHTIG: Attachments kopieren - if original_parsed.is_multipart(): - for part in original_parsed.walk(): - # Nur non-body parts (Attachments) - if part.get_content_maintype() == 'multipart': - continue - if part.get_content_type() in ['text/plain', 'text/html']: - continue # Body bereits oben behandelt - - # Attachment hinzufügen - msg.attach(part) - - return msg - - -# ========================================== -# HAUPTFUNKTION: PROCESS MESSAGE -# ========================================== - -def process_message(message_body: dict, receive_count: int) -> bool: - """ - Verarbeitet eine E-Mail aus der Queue (SNS-wrapped SES Notification) - Returns: True (Erfolg/Löschen), False (Retry/Behalten) - """ - try: - # 1. UNPACKING (SNS -> SES) - # SQS Body ist JSON. Darin ist meist 'Type': 'Notification' und 'Message': '...JSONString...' - if 'Message' in message_body and 'Type' in message_body: - # Es ist eine SNS Notification - sns_content = message_body['Message'] - if isinstance(sns_content, str): - ses_msg = json.loads(sns_content) - else: - ses_msg = sns_content - else: - # Fallback: Vielleicht doch direkt SES (Legacy support) - ses_msg = message_body - - # 2. DATEN EXTRAHIEREN - mail = ses_msg.get('mail', {}) - receipt = ses_msg.get('receipt', {}) - - message_id = mail.get('messageId') # Das ist der S3 Key! - # FIX: Amazon SES Setup Notification ignorieren - if message_id == "AMAZON_SES_SETUP_NOTIFICATION": - log("ℹ️ Received Amazon SES Setup Notification. Ignoring.", 'INFO') - return True # Erfolgreich (löschen), da kein Fehler - from_addr = mail.get('source') - recipients = receipt.get('recipients', []) - - # S3 Key Validation - if not message_id: - log("❌ Error: No messageId in event payload", 'ERROR') - return True # Löschen, da unbrauchbar - - # Domain Validation - # Wir nehmen den ersten Empfänger um die Domain zu prüfen - if recipients: - first_recipient = recipients[0] - domain = first_recipient.split('@')[1] - - if domain.lower() != WORKER_DOMAIN.lower(): - log(f"⚠ Security: Ignored message for {domain} (I am worker for {WORKER_DOMAIN})", 'WARNING') - return True # Löschen, gehört nicht hierher - else: - log("⚠ Warning: No recipients in event", 'WARNING') - return True - - # Bucket Name ableiten - bucket = get_bucket_name(WORKER_DOMAIN) - key = message_id - - log(f"\n{'='*70}") - log(f"Processing Email (SNS/SES):") - log(f" ID: {key}") - log(f" Recipients: {len(recipients)} -> {recipients}") - log(f" Bucket: {bucket}") - - # 3. LADEN AUS S3 - try: - response = s3.get_object(Bucket=bucket, Key=key) - raw_bytes = response['Body'].read() - log(f"✓ Loaded {len(raw_bytes)} bytes from S3") - except s3.exceptions.NoSuchKey: - # Race Condition: SNS war schneller als S3. - # Wir geben False zurück, damit SQS es in 30s nochmal versucht. - if receive_count < 5: - log(f"⏳ S3 Object not found yet (Attempt {receive_count}). Retrying...", 'WARNING') - return False - else: - log(f"❌ S3 Object missing permanently after retries.", 'ERROR') - return True # Löschen - except Exception as e: - log(f"❌ S3 Download Error: {e}", 'ERROR') - return False # Retry - - # 4. PARSING & BOUNCE LOGIC - try: - parsed = BytesParser(policy=SMTPPolicy).parsebytes(raw_bytes) - subject = parsed.get('Subject', '(no subject)') - - # Hier passiert die Magie: Bounce Header umschreiben - parsed, modified = apply_bounce_logic(parsed, subject) - - if modified: - log(" ✨ Bounce detected & headers rewritten via DynamoDB") - # Wir arbeiten mit den modifizierten Bytes weiter - raw_bytes = parsed.as_bytes() - from_addr_final = parsed.get('From') # Neuer Absender für SMTP Envelope - else: - from_addr_final = from_addr # Original Envelope Sender - - except Exception as e: - log(f"⚠ Parsing/Logic Error: {e}. Sending original.", 'WARNING') - from_addr_final = from_addr - - # 5. OOO & FORWARD LOGIC (neu, vor SMTP-Versand) - if rules_table and not is_ses_bounce_notification(parsed): - for recipient in recipients: - try: - rule = rules_table.get_item(Key={'email_address': recipient}).get('Item', {}) - - # OOO handling - if rule.get('ooo_active', False): - ooo_msg = rule.get('ooo_message', 'Default OOO message.') - content_type = rule.get('ooo_content_type', 'text') - sender = parsed.get('From') - - try: - # Erstelle komplette MIME-Message - ooo_reply = create_ooo_reply(parsed, recipient, ooo_msg, content_type) - - # Sende via send_raw_email (unterstützt komplexe MIME) - ses.send_raw_email( - Source=recipient, - Destinations=[sender], - RawMessage={'Data': ooo_reply.as_bytes()} - ) - log(f"✓ Sent OOO reply to {sender} from {recipient}") - - except ClientError as e: - error_code = e.response['Error']['Code'] - log(f"⚠ SES OOO send failed ({error_code}): {e}", 'ERROR') - - # Forward handling - forwards = rule.get('forwards', []) - if forwards: - original_from = parsed.get('From') - - for forward_to in forwards: - try: - # Erstelle komplette Forward-Message mit Attachments - fwd_msg = create_forward_message(parsed, recipient, forward_to, original_from) - - # Sende via send_raw_email - ses.send_raw_email( - Source=recipient, - Destinations=[forward_to], - RawMessage={'Data': fwd_msg.as_bytes()} - ) - log(f"✓ Forwarded to {forward_to} from {recipient} (original: {original_from})") - - except ClientError as e: - error_code = e.response['Error']['Code'] - log(f"⚠ SES forward failed to {forward_to} ({error_code}): {e}", 'ERROR') - - except ClientError as e: - error_code = e.response['Error']['Code'] - if error_code == 'MessageRejected': - log(f"⚠ SES rejected send for {recipient}: Check verification/quotas.", 'ERROR') - elif error_code == 'AccessDenied': - log(f"⚠ SES AccessDenied for {recipient}: Check IAM policy.", 'ERROR') - else: - log(f"⚠ SES error for {recipient}: {e}", 'ERROR') - except Exception as e: - log(f"⚠ Rule processing error for {recipient}: {e}", 'WARNING') - traceback.print_exc() - - # 6. SMTP VERSAND (Loop über Recipients) - log(f"📤 Sending to {len(recipients)} recipient(s)...") - - successful = [] - failed_permanent = [] - failed_temporary = [] - - for recipient in recipients: - # Wir nutzen raw_bytes (ggf. modifiziert) - # WICHTIG: Als Envelope Sender nutzen wir 'from_addr_final' - # (bei Bounces ist das der Original-Empfänger, sonst der SES Sender) - success, error, is_perm = send_email(from_addr_final, recipient, raw_bytes) - - if success: - successful.append(recipient) - elif is_perm: - failed_permanent.append(recipient) - else: - failed_temporary.append(recipient) - - # 6. RESULTAT & CLEANUP - log(f"📊 Results: {len(successful)} OK, {len(failed_temporary)} TempFail, {len(failed_permanent)} PermFail") - - if len(successful) > 0: - # Mindestens einer durchgegangen -> Erfolg - mark_as_processed(bucket, key, failed_permanent if failed_permanent else None) - log(f"✅ Success. Deleted from queue.") - return True - - elif len(failed_permanent) == len(recipients): - # Alle permanent fehlgeschlagen (User unknown) -> Löschen - mark_as_all_invalid(bucket, key, failed_permanent) - log(f"🛑 All recipients invalid. Deleted from queue.") - return True - - else: - # Temporäre Fehler -> Retry - log(f"🔄 Temporary failures. Keeping in queue.") - return False - - except Exception as e: - log(f"❌ CRITICAL WORKER ERROR: {e}", 'ERROR') - traceback.print_exc() - return False # Retry (außer es crasht immer wieder) - - -def main_loop(): - """Hauptschleife: Pollt SQS Queue und verarbeitet Nachrichten""" - - # Queue URL ermitteln - try: - queue_url = get_queue_url() - except Exception as e: - log(f"FATAL: {e}", 'ERROR') - sys.exit(1) - - log(f"\n{'='*70}") - log(f"🚀 Email Worker started") - log(f"{'='*70}") - log(f" Worker Name: {WORKER_NAME}") - log(f" Domain: {WORKER_DOMAIN}") - log(f" Queue: {queue_url}") - log(f" Region: {AWS_REGION}") - log(f" SMTP: {SMTP_HOST}:{SMTP_PORT} (TLS: {SMTP_USE_TLS})") - log(f" Poll interval: {POLL_INTERVAL}s") - log(f" Max messages per poll: {MAX_MESSAGES}") - log(f" Visibility timeout: {VISIBILITY_TIMEOUT}s") - log(f"{'='*70}\n") - - consecutive_errors = 0 - max_consecutive_errors = 10 - messages_processed = 0 - last_activity = time.time() - - while not shutdown_requested: - try: - # Messages aus Queue holen (Long Polling) - response = sqs.receive_message( - QueueUrl=queue_url, - MaxNumberOfMessages=MAX_MESSAGES, - WaitTimeSeconds=POLL_INTERVAL, - VisibilityTimeout=VISIBILITY_TIMEOUT, - AttributeNames=['ApproximateReceiveCount', 'SentTimestamp'], - MessageAttributeNames=['All'] - ) - - # Reset error counter bei erfolgreicher Abfrage - consecutive_errors = 0 - - if 'Messages' not in response: - # Keine Nachrichten - if time.time() - last_activity > 60: - log(f"Waiting for messages... (processed: {messages_processed})") - last_activity = time.time() - continue - - message_count = len(response['Messages']) - log(f"\n✉ Received {message_count} message(s) from queue") - last_activity = time.time() - - # Messages verarbeiten - for msg in response['Messages']: - if shutdown_requested: - log("Shutdown requested, stopping processing") - break - - receipt_handle = msg['ReceiptHandle'] - - # Receive Count auslesen - receive_count = int(msg.get('Attributes', {}).get('ApproximateReceiveCount', 1)) - - # Sent Timestamp (für Queue-Zeit-Berechnung) - sent_timestamp = int(msg.get('Attributes', {}).get('SentTimestamp', 0)) / 1000 - queue_time = int(time.time() - sent_timestamp) if sent_timestamp else 0 - - if queue_time > 0: - log(f"Message was in queue for {queue_time}s") - - try: - message_body = json.loads(msg['Body']) - - # E-Mail verarbeiten - success = process_message(message_body, receive_count) - - if success: - # Message aus Queue löschen - sqs.delete_message( - QueueUrl=queue_url, - ReceiptHandle=receipt_handle - ) - log("✓ Message deleted from queue") - messages_processed += 1 - else: - # Bei Fehler bleibt Message in Queue - log(f"⚠ Message kept in queue for retry (attempt {receive_count}/3)") - - except json.JSONDecodeError as e: - log(f"✗ Invalid message format: {e}", 'ERROR') - # Ungültige Messages löschen (nicht retryable) - sqs.delete_message( - QueueUrl=queue_url, - ReceiptHandle=receipt_handle - ) - - except Exception as e: - log(f"✗ Error processing message: {e}", 'ERROR') - traceback.print_exc() - # Message bleibt in Queue für Retry - - except KeyboardInterrupt: - log("\n⚠ Keyboard interrupt received") - break - - except Exception as e: - consecutive_errors += 1 - log(f"✗ Error in main loop ({consecutive_errors}/{max_consecutive_errors}): {e}", 'ERROR') - traceback.print_exc() - - if consecutive_errors >= max_consecutive_errors: - log("Too many consecutive errors, shutting down", 'ERROR') - break - - # Kurze Pause bei Fehlern - time.sleep(5) - - log(f"\n{'='*70}") - log(f"👋 Worker shutting down") - log(f" Messages processed: {messages_processed}") - log(f"{'='*70}\n") - - -if __name__ == '__main__': - # Validierung - if not WORKER_DOMAIN: - log("ERROR: WORKER_DOMAIN not set!", 'ERROR') - sys.exit(1) - - try: - main_loop() - except Exception as e: - log(f"Fatal error: {e}", 'ERROR') - traceback.print_exc() - sys.exit(1) \ No newline at end of file diff --git a/deprecated_worker/worker.py.old b/deprecated_worker/worker.py.old deleted file mode 100644 index ee57a74..0000000 --- a/deprecated_worker/worker.py.old +++ /dev/null @@ -1,520 +0,0 @@ -import os -import sys -import boto3 -import smtplib -import json -import time -import traceback -import signal -from email.parser import BytesParser -from email.policy import SMTP as SMTPPolicy -from datetime import datetime - -# AWS Configuration -AWS_REGION = 'us-east-2' -s3 = boto3.client('s3', region_name=AWS_REGION) -sqs = boto3.client('sqs', region_name=AWS_REGION) - -# ✨ Worker Configuration (domain-spezifisch) -WORKER_DOMAIN = os.environ.get('WORKER_DOMAIN') # z.B. 'andreasknuth.de' -WORKER_NAME = os.environ.get('WORKER_NAME', f'worker-{WORKER_DOMAIN}') - -# Worker Settings -POLL_INTERVAL = int(os.environ.get('POLL_INTERVAL', '20')) -MAX_MESSAGES = int(os.environ.get('MAX_MESSAGES', '10')) -VISIBILITY_TIMEOUT = int(os.environ.get('VISIBILITY_TIMEOUT', '300')) - -# SMTP Configuration (einfach, da nur 1 Domain pro Worker) -SMTP_HOST = os.environ.get('SMTP_HOST', 'localhost') -SMTP_PORT = int(os.environ.get('SMTP_PORT', '25')) -SMTP_USE_TLS = os.environ.get('SMTP_USE_TLS', 'false').lower() == 'true' -SMTP_USER = os.environ.get('SMTP_USER') -SMTP_PASS = os.environ.get('SMTP_PASS') - -# Graceful shutdown -shutdown_requested = False - - -def signal_handler(signum, frame): - global shutdown_requested - print(f"\n⚠ Shutdown signal received (signal {signum})") - shutdown_requested = True - - -signal.signal(signal.SIGTERM, signal_handler) -signal.signal(signal.SIGINT, signal_handler) - - -def log(message: str, level: str = 'INFO'): - """Structured logging with timestamp""" - timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') - print(f"[{timestamp}] [{level}] [{WORKER_NAME}] {message}", flush=True) - - -def domain_to_queue_name(domain: str) -> str: - """Konvertiert Domain zu SQS Queue Namen""" - return domain.replace('.', '-') + '-queue' - - -def get_queue_url() -> str: - """Ermittelt Queue-URL für die konfigurierte Domain""" - queue_name = domain_to_queue_name(WORKER_DOMAIN) - - try: - response = sqs.get_queue_url(QueueName=queue_name) - return response['QueueUrl'] - except Exception as e: - raise Exception(f"Failed to get queue URL for {WORKER_DOMAIN}: {e}") - - -def mark_as_processed(bucket: str, key: str, invalid_inboxes: list = None): - """ - Markiert E-Mail als erfolgreich zugestellt - Wird nur aufgerufen wenn mindestens 1 Recipient erfolgreich war - """ - try: - head = s3.head_object(Bucket=bucket, Key=key) - metadata = head.get('Metadata', {}) or {} - - metadata['processed'] = 'true' - metadata['processed_at'] = str(int(time.time())) - metadata['processed_by'] = WORKER_NAME - metadata['status'] = 'delivered' - metadata.pop('processing_started', None) - metadata.pop('queued_at', None) - - # Invalid inboxes speichern falls vorhanden - if invalid_inboxes: - metadata['invalid_inboxes'] = ','.join(invalid_inboxes) - log(f"⚠ Invalid inboxes recorded: {', '.join(invalid_inboxes)}", 'WARNING') - - s3.copy_object( - Bucket=bucket, - Key=key, - CopySource={'Bucket': bucket, 'Key': key}, - Metadata=metadata, - MetadataDirective='REPLACE' - ) - - log(f"✓ Marked s3://{bucket}/{key} as processed", 'SUCCESS') - - except Exception as e: - log(f"Failed to mark as processed: {e}", 'WARNING') - - -def mark_as_all_invalid(bucket: str, key: str, invalid_inboxes: list): - """ - Markiert E-Mail als fehlgeschlagen weil alle Recipients ungültig sind - """ - try: - head = s3.head_object(Bucket=bucket, Key=key) - metadata = head.get('Metadata', {}) or {} - - metadata['processed'] = 'true' - metadata['processed_at'] = str(int(time.time())) - metadata['processed_by'] = WORKER_NAME - metadata['status'] = 'failed' - metadata['error'] = 'All recipients are invalid (mailboxes do not exist)' - metadata['invalid_inboxes'] = ','.join(invalid_inboxes) - metadata.pop('processing_started', None) - metadata.pop('queued_at', None) - - s3.copy_object( - Bucket=bucket, - Key=key, - CopySource={'Bucket': bucket, 'Key': key}, - Metadata=metadata, - MetadataDirective='REPLACE' - ) - - log(f"✓ Marked s3://{bucket}/{key} as failed (all invalid)", 'SUCCESS') - - except Exception as e: - log(f"Failed to mark as all invalid: {e}", 'WARNING') - - -def mark_as_failed(bucket: str, key: str, error: str, receive_count: int): - """ - Markiert E-Mail als komplett fehlgeschlagen - Wird nur aufgerufen wenn ALLE Recipients fehlschlagen - """ - try: - head = s3.head_object(Bucket=bucket, Key=key) - metadata = head.get('Metadata', {}) or {} - - metadata['status'] = 'failed' - metadata['failed_at'] = str(int(time.time())) - metadata['failed_by'] = WORKER_NAME - metadata['error'] = error[:500] # S3 Metadata limit - metadata['retry_count'] = str(receive_count) - metadata.pop('processing_started', None) - - s3.copy_object( - Bucket=bucket, - Key=key, - CopySource={'Bucket': bucket, 'Key': key}, - Metadata=metadata, - MetadataDirective='REPLACE' - ) - - log(f"✗ Marked s3://{bucket}/{key} as failed: {error[:100]}", 'ERROR') - - except Exception as e: - log(f"Failed to mark as failed: {e}", 'WARNING') - - -def is_temporary_smtp_error(error_msg: str) -> bool: - """ - Prüft ob SMTP-Fehler temporär ist (Retry sinnvoll) - 4xx Codes = temporär, 5xx = permanent - """ - temporary_indicators = [ - '421', # Service not available - '450', # Mailbox unavailable - '451', # Local error - '452', # Insufficient storage - '4', # Generisch 4xx - 'timeout', - 'connection refused', - 'connection reset', - 'network unreachable', - 'temporarily', - 'try again' - ] - - error_lower = error_msg.lower() - return any(indicator in error_lower for indicator in temporary_indicators) - - -def is_permanent_recipient_error(error_msg: str) -> bool: - """ - Prüft ob Fehler permanent für diesen Recipient ist (Inbox existiert nicht) - 550 = Mailbox not found, 551 = User not local, 553 = Mailbox name invalid - """ - permanent_indicators = [ - '550', # Mailbox unavailable / not found - '551', # User not local - '553', # Mailbox name not allowed / invalid - 'mailbox not found', - 'user unknown', - 'no such user', - 'recipient rejected', - 'does not exist', - 'invalid recipient', - 'unknown user' - ] - - error_lower = error_msg.lower() - return any(indicator in error_lower for indicator in permanent_indicators) - - -def send_email(from_addr: str, recipient: str, raw_message: bytes) -> tuple: - """ - Sendet E-Mail via SMTP an EINEN Empfänger - Returns: (success: bool, error: str or None, is_permanent: bool) - """ - - try: - with smtplib.SMTP(SMTP_HOST, SMTP_PORT, timeout=30) as smtp: - smtp.ehlo() - - # STARTTLS falls konfiguriert - if SMTP_USE_TLS: - try: - smtp.starttls() - smtp.ehlo() - except Exception as e: - log(f" STARTTLS failed: {e}", 'WARNING') - - # Authentication falls konfiguriert - if SMTP_USER and SMTP_PASS: - try: - smtp.login(SMTP_USER, SMTP_PASS) - except Exception as e: - log(f" SMTP auth failed: {e}", 'WARNING') - - # E-Mail senden - result = smtp.sendmail(from_addr, [recipient], raw_message) - - # Result auswerten - if isinstance(result, dict) and result: - # Empfänger wurde abgelehnt - error = result.get(recipient, 'Unknown refusal') - is_permanent = is_permanent_recipient_error(str(error)) - log(f" ✗ {recipient}: {error} ({'permanent' if is_permanent else 'temporary'})", 'ERROR') - return False, str(error), is_permanent - else: - # Erfolgreich - log(f" ✓ {recipient}: Delivered", 'SUCCESS') - return True, None, False - - except smtplib.SMTPException as e: - error_msg = str(e) - is_permanent = is_permanent_recipient_error(error_msg) - log(f" ✗ {recipient}: SMTP error - {error_msg}", 'ERROR') - return False, error_msg, is_permanent - - except Exception as e: - # Connection errors sind immer temporär - log(f" ✗ {recipient}: Connection error - {e}", 'ERROR') - return False, str(e), False - - -def process_message(message_body: dict, receive_count: int) -> bool: - """ - Verarbeitet eine E-Mail aus der Queue - Kann mehrere Recipients haben - sendet an alle - Returns: True wenn erfolgreich (Message löschen), False bei Fehler (Retry) - """ - - bucket = message_body['bucket'] - key = message_body['key'] - from_addr = message_body['from'] - recipients = message_body['recipients'] # Liste von Empfängern - domain = message_body['domain'] - subject = message_body.get('subject', '(unknown)') - message_id = message_body.get('message_id', '(unknown)') - - log(f"\n{'='*70}") - log(f"Processing email (Attempt #{receive_count}):") - log(f" MessageId: {message_id}") - log(f" S3 Key: {key}") - log(f" Domain: {domain}") - log(f" From: {from_addr}") - log(f" Recipients: {len(recipients)}") - for recipient in recipients: - log(f" - {recipient}") - log(f" Subject: {subject}") - log(f" S3: s3://{bucket}/{key}") - log(f"{'='*70}") - - # ✨ VALIDATION: Domain muss mit Worker-Domain übereinstimmen - if domain.lower() != WORKER_DOMAIN.lower(): - log(f"ERROR: Wrong domain! Expected {WORKER_DOMAIN}, got {domain}", 'ERROR') - log("This message should not be in this queue! Deleting...", 'ERROR') - return True # Message löschen (gehört nicht hierher) - - # E-Mail aus S3 laden - try: - response = s3.get_object(Bucket=bucket, Key=key) - raw_bytes = response['Body'].read() - log(f"✓ Loaded {len(raw_bytes):,} bytes ({len(raw_bytes)/1024:.1f} KB)") - except s3.exceptions.NoSuchKey: - log(f"✗ S3 object not found (may have been deleted)", 'ERROR') - return True # Nicht retryable - Message löschen - except Exception as e: - log(f"✗ Failed to load from S3: {e}", 'ERROR') - return False # Könnte temporär sein - retry - - # An alle Recipients senden - log(f"\n📤 Sending to {len(recipients)} recipient(s)...") - log(f"Connecting to {SMTP_HOST}:{SMTP_PORT} (TLS: {SMTP_USE_TLS})") - - successful = [] - failed_temporary = [] - failed_permanent = [] - - for recipient in recipients: - success, error, is_permanent = send_email(from_addr, recipient, raw_bytes) - - if success: - successful.append(recipient) - elif is_permanent: - failed_permanent.append(recipient) - else: - failed_temporary.append(recipient) - - # Ergebnis-Zusammenfassung - log(f"\n📊 Delivery Results:") - log(f" ✓ Successful: {len(successful)}/{len(recipients)}") - log(f" ✗ Failed (temporary): {len(failed_temporary)}") - log(f" ✗ Failed (permanent): {len(failed_permanent)}") - - # Entscheidungslogik - if len(successful) > 0: - # ✅ Fall 1: Mindestens 1 Recipient erfolgreich - # → status=delivered, invalid_inboxes tracken - - invalid_inboxes = failed_permanent if failed_permanent else None - mark_as_processed(bucket, key, invalid_inboxes) - - log(f"{'='*70}") - log(f"✅ Email delivered to {len(successful)} recipient(s)", 'SUCCESS') - if failed_permanent: - log(f"⚠ {len(failed_permanent)} invalid inbox(es): {', '.join(failed_permanent)}", 'WARNING') - if failed_temporary: - log(f"⚠ {len(failed_temporary)} temporary failure(s) - NOT retrying (at least 1 success)", 'WARNING') - log(f"{'='*70}\n") - - return True # Message löschen - - elif len(failed_permanent) == len(recipients): - # ❌ Fall 2: ALLE Recipients permanent fehlgeschlagen (alle Inboxen ungültig) - # → status=failed, invalid_inboxes = ALLE - - mark_as_all_invalid(bucket, key, failed_permanent) - - log(f"{'='*70}") - log(f"✗ All recipients are invalid inboxes - NO delivery", 'ERROR') - log(f" Invalid: {', '.join(failed_permanent)}", 'ERROR') - log(f"{'='*70}\n") - - return True # Message löschen (nicht retryable) - - else: - # ⏳ Fall 3: Nur temporäre Fehler, keine erfolgreichen Deliveries - # → Retry wenn noch Versuche übrig - - if receive_count < 3: - log(f"⚠ All failures are temporary, will retry", 'WARNING') - log(f"{'='*70}\n") - return False # Message NICHT löschen → Retry - else: - # Max retries erreicht → als failed markieren - error_summary = f"Failed after {receive_count} attempts. Temporary errors for all recipients." - mark_as_failed(bucket, key, error_summary, receive_count) - - log(f"{'='*70}") - log(f"✗ Email delivery failed permanently after {receive_count} attempts", 'ERROR') - log(f"{'='*70}\n") - - return False # Nach 3 Versuchen → automatisch DLQ - - -def main_loop(): - """Hauptschleife: Pollt SQS Queue und verarbeitet Nachrichten""" - - # Queue URL ermitteln - try: - queue_url = get_queue_url() - except Exception as e: - log(f"FATAL: {e}", 'ERROR') - sys.exit(1) - - log(f"\n{'='*70}") - log(f"🚀 Email Worker started") - log(f"{'='*70}") - log(f" Worker Name: {WORKER_NAME}") - log(f" Domain: {WORKER_DOMAIN}") - log(f" Queue: {queue_url}") - log(f" Region: {AWS_REGION}") - log(f" SMTP: {SMTP_HOST}:{SMTP_PORT} (TLS: {SMTP_USE_TLS})") - log(f" Poll interval: {POLL_INTERVAL}s") - log(f" Max messages per poll: {MAX_MESSAGES}") - log(f" Visibility timeout: {VISIBILITY_TIMEOUT}s") - log(f"{'='*70}\n") - - consecutive_errors = 0 - max_consecutive_errors = 10 - messages_processed = 0 - last_activity = time.time() - - while not shutdown_requested: - try: - # Messages aus Queue holen (Long Polling) - response = sqs.receive_message( - QueueUrl=queue_url, - MaxNumberOfMessages=MAX_MESSAGES, - WaitTimeSeconds=POLL_INTERVAL, - VisibilityTimeout=VISIBILITY_TIMEOUT, - AttributeNames=['ApproximateReceiveCount', 'SentTimestamp'], - MessageAttributeNames=['All'] - ) - - # Reset error counter bei erfolgreicher Abfrage - consecutive_errors = 0 - - if 'Messages' not in response: - # Keine Nachrichten - if time.time() - last_activity > 60: - log(f"Waiting for messages... (processed: {messages_processed})") - last_activity = time.time() - continue - - message_count = len(response['Messages']) - log(f"\n✉ Received {message_count} message(s) from queue") - last_activity = time.time() - - # Messages verarbeiten - for msg in response['Messages']: - if shutdown_requested: - log("Shutdown requested, stopping processing") - break - - receipt_handle = msg['ReceiptHandle'] - - # Receive Count auslesen - receive_count = int(msg.get('Attributes', {}).get('ApproximateReceiveCount', 1)) - - # Sent Timestamp (für Queue-Zeit-Berechnung) - sent_timestamp = int(msg.get('Attributes', {}).get('SentTimestamp', 0)) / 1000 - queue_time = int(time.time() - sent_timestamp) if sent_timestamp else 0 - - if queue_time > 0: - log(f"Message was in queue for {queue_time}s") - - try: - message_body = json.loads(msg['Body']) - - # E-Mail verarbeiten - success = process_message(message_body, receive_count) - - if success: - # Message aus Queue löschen - sqs.delete_message( - QueueUrl=queue_url, - ReceiptHandle=receipt_handle - ) - log("✓ Message deleted from queue") - messages_processed += 1 - else: - # Bei Fehler bleibt Message in Queue - log(f"⚠ Message kept in queue for retry (attempt {receive_count}/3)") - - except json.JSONDecodeError as e: - log(f"✗ Invalid message format: {e}", 'ERROR') - # Ungültige Messages löschen (nicht retryable) - sqs.delete_message( - QueueUrl=queue_url, - ReceiptHandle=receipt_handle - ) - - except Exception as e: - log(f"✗ Error processing message: {e}", 'ERROR') - traceback.print_exc() - # Message bleibt in Queue für Retry - - except KeyboardInterrupt: - log("\n⚠ Keyboard interrupt received") - break - - except Exception as e: - consecutive_errors += 1 - log(f"✗ Error in main loop ({consecutive_errors}/{max_consecutive_errors}): {e}", 'ERROR') - traceback.print_exc() - - if consecutive_errors >= max_consecutive_errors: - log("Too many consecutive errors, shutting down", 'ERROR') - break - - # Kurze Pause bei Fehlern - time.sleep(5) - - log(f"\n{'='*70}") - log(f"👋 Worker shutting down") - log(f" Messages processed: {messages_processed}") - log(f"{'='*70}\n") - - -if __name__ == '__main__': - # Validierung - if not WORKER_DOMAIN: - log("ERROR: WORKER_DOMAIN not set!", 'ERROR') - sys.exit(1) - - try: - main_loop() - except Exception as e: - log(f"Fatal error: {e}", 'ERROR') - traceback.print_exc() - sys.exit(1) \ No newline at end of file diff --git a/email-worker-nodejs/.env.example b/email-worker-nodejs/.env.example new file mode 100644 index 0000000..9ac3fbe --- /dev/null +++ b/email-worker-nodejs/.env.example @@ -0,0 +1,38 @@ +# AWS credentials (or use IAM role / instance profile) +AWS_REGION=us-east-2 +# AWS_ACCESS_KEY_ID= +# AWS_SECRET_ACCESS_KEY= + +# Domains: comma-separated list OR file path +# DOMAINS=andreasknuth.de,bizmatch.net +DOMAINS_FILE=/etc/email-worker/domains.txt + +# SMTP (Docker Mail Server) +SMTP_HOST=localhost +SMTP_PORT=25 +SMTP_USE_TLS=false +SMTP_USER= +SMTP_PASS= +SMTP_POOL_SIZE=5 + +# Internal SMTP port (bypass transport_maps) +INTERNAL_SMTP_PORT=25 + +# Worker settings +WORKER_THREADS=10 +POLL_INTERVAL=20 +MAX_MESSAGES=10 +VISIBILITY_TIMEOUT=300 + +# DynamoDB tables +DYNAMODB_RULES_TABLE=email-rules +DYNAMODB_MESSAGES_TABLE=ses-outbound-messages +DYNAMODB_BLOCKED_TABLE=email-blocked-senders + +# Bounce handling +BOUNCE_LOOKUP_RETRIES=3 +BOUNCE_LOOKUP_DELAY=1.0 + +# Monitoring +METRICS_PORT=8000 +HEALTH_PORT=8080 diff --git a/email-worker-nodejs/Dockerfile b/email-worker-nodejs/Dockerfile new file mode 100644 index 0000000..f31f5c5 --- /dev/null +++ b/email-worker-nodejs/Dockerfile @@ -0,0 +1,34 @@ +# ── Build stage ────────────────────────────────────────────────── +FROM node:20-slim AS builder + +WORKDIR /app + +COPY package.json package-lock.json* ./ +RUN npm ci + +COPY tsconfig.json ./ +COPY src/ ./src/ + +RUN npx tsc + +# ── Run stage ──────────────────────────────────────────────────── +FROM node:20-slim + +WORKDIR /app + +# Only production deps +COPY package.json package-lock.json* ./ +RUN npm ci --omit=dev && npm cache clean --force + +# Compiled JS from build stage +COPY --from=builder /app/dist ./dist + +# Config directory (mount domains.txt here) +RUN mkdir -p /etc/email-worker /var/log/email-worker + +EXPOSE 8000 8080 + +HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \ + CMD node -e "fetch('http://localhost:8080').then(r => r.ok ? process.exit(0) : process.exit(1)).catch(() => process.exit(1))" + +CMD ["node", "dist/main.js"] diff --git a/email-worker-nodejs/blocklist.ts b/email-worker-nodejs/blocklist.ts new file mode 100644 index 0000000..28f1f0a --- /dev/null +++ b/email-worker-nodejs/blocklist.ts @@ -0,0 +1,62 @@ +/** + * Sender blocklist checking with wildcard / glob support + * + * Uses picomatch for pattern matching (equivalent to Python's fnmatch). + * Patterns are stored per-recipient in DynamoDB. + */ + +import picomatch from 'picomatch'; +import type { DynamoDBHandler } from '../aws/dynamodb.js'; +import { log } from '../logger.js'; + +/** + * Extract the bare email address from a From header value. + * "John Doe " → "john@example.com" + */ +function extractAddress(sender: string): string { + const match = sender.match(/<([^>]+)>/); + const addr = match ? match[1] : sender; + return addr.trim().toLowerCase(); +} + +export class BlocklistChecker { + constructor(private dynamodb: DynamoDBHandler) {} + + /** + * Batch-check whether a sender is blocked for each recipient. + * Uses a single batch DynamoDB call for efficiency. + */ + async batchCheckBlockedSenders( + recipients: string[], + sender: string, + workerName: string, + ): Promise> { + const patternsByRecipient = + await this.dynamodb.batchGetBlockedPatterns(recipients); + + const senderClean = extractAddress(sender); + const result: Record = {}; + + for (const recipient of recipients) { + const patterns = patternsByRecipient[recipient] ?? []; + let isBlocked = false; + + for (const pattern of patterns) { + if (picomatch.isMatch(senderClean, pattern.toLowerCase())) { + log( + `⛔ BLOCKED: Sender ${senderClean} matches pattern '${pattern}' ` + + `for inbox ${recipient}`, + 'WARNING', + workerName, + ); + isBlocked = true; + break; + } + } + + result[recipient] = isBlocked; + } + + return result; + } +} diff --git a/email-worker-nodejs/bounce-handler.ts b/email-worker-nodejs/bounce-handler.ts new file mode 100644 index 0000000..9796824 --- /dev/null +++ b/email-worker-nodejs/bounce-handler.ts @@ -0,0 +1,190 @@ +/** + * Bounce detection and header rewriting + * + * When Amazon SES returns a bounce, the From header is + * mailer-daemon@amazonses.com. We look up the original sender + * in DynamoDB and rewrite the headers so the bounce appears + * to come from the actual bounced recipient. + */ + +import type { ParsedMail } from 'mailparser'; +import type { DynamoDBHandler } from '../aws/dynamodb.js'; +import { isSesBounceNotification, getHeader } from './parser.js'; +import { log } from '../logger.js'; + +export interface BounceResult { + /** Updated raw bytes (headers rewritten if bounce was detected) */ + rawBytes: Buffer; + /** Whether bounce was detected and headers were modified */ + modified: boolean; + /** Whether this email is a bounce notification at all */ + isBounce: boolean; + /** The effective From address (rewritten or original) */ + fromAddr: string; +} + +export class BounceHandler { + constructor(private dynamodb: DynamoDBHandler) {} + + /** + * Detect SES bounce, look up original sender in DynamoDB, + * and rewrite headers in the raw buffer. + * + * We operate on the raw Buffer because we need to preserve + * the original MIME structure exactly, only swapping specific + * header lines. mailparser's ParsedMail is read-only. + */ + async applyBounceLogic( + parsed: ParsedMail, + rawBytes: Buffer, + subject: string, + workerName = 'unified', + ): Promise { + if (!isSesBounceNotification(parsed)) { + return { + rawBytes, + modified: false, + isBounce: false, + fromAddr: parsed.from?.text ?? '', + }; + } + + log('🔍 Detected SES MAILER-DAEMON bounce notification', 'INFO', workerName); + + // Extract Message-ID from the bounce notification header + const rawMessageId = getHeader(parsed, 'message-id') + .replace(/^$/, '') + .split('@')[0]; + + if (!rawMessageId) { + log('⚠ Could not extract Message-ID from bounce notification', 'WARNING', workerName); + return { + rawBytes, + modified: false, + isBounce: true, + fromAddr: parsed.from?.text ?? '', + }; + } + + log(` Looking up Message-ID: ${rawMessageId}`, 'INFO', workerName); + + const bounceInfo = await this.dynamodb.getBounceInfo(rawMessageId, workerName); + if (!bounceInfo) { + return { + rawBytes, + modified: false, + isBounce: true, + fromAddr: parsed.from?.text ?? '', + }; + } + + // Log bounce details + log(`✓ Found bounce info:`, 'INFO', workerName); + log(` Original sender: ${bounceInfo.original_source}`, 'INFO', workerName); + log(` Bounce type: ${bounceInfo.bounceType}/${bounceInfo.bounceSubType}`, 'INFO', workerName); + log(` Bounced recipients: ${bounceInfo.bouncedRecipients}`, 'INFO', workerName); + + if (!bounceInfo.bouncedRecipients.length) { + log('⚠ No bounced recipients found in bounce info', 'WARNING', workerName); + return { + rawBytes, + modified: false, + isBounce: true, + fromAddr: parsed.from?.text ?? '', + }; + } + + const newFrom = bounceInfo.bouncedRecipients[0]; + + // Rewrite headers in raw bytes + let modifiedBytes = rawBytes; + const originalFrom = getHeader(parsed, 'from'); + + // Replace From header + modifiedBytes = replaceHeader(modifiedBytes, 'From', newFrom); + + // Add diagnostic headers + modifiedBytes = addHeader(modifiedBytes, 'X-Original-SES-From', originalFrom); + modifiedBytes = addHeader( + modifiedBytes, + 'X-Bounce-Type', + `${bounceInfo.bounceType}/${bounceInfo.bounceSubType}`, + ); + + // Add Reply-To if not present + if (!getHeader(parsed, 'reply-to')) { + modifiedBytes = addHeader(modifiedBytes, 'Reply-To', newFrom); + } + + // Adjust subject for generic delivery status notifications + const subjectLower = subject.toLowerCase(); + if ( + subjectLower.includes('delivery status notification') || + subjectLower.includes('thanks for your submission') + ) { + modifiedBytes = replaceHeader( + modifiedBytes, + 'Subject', + `Delivery Status: ${newFrom}`, + ); + } + + log(`✓ Rewritten FROM: ${newFrom}`, 'SUCCESS', workerName); + + return { + rawBytes: modifiedBytes, + modified: true, + isBounce: true, + fromAddr: newFrom, + }; + } +} + +// --------------------------------------------------------------------------- +// Raw header manipulation helpers +// --------------------------------------------------------------------------- + +/** + * Replace a header value in raw MIME bytes. + * Handles multi-line (folded) headers. + */ +function replaceHeader(raw: Buffer, name: string, newValue: string): Buffer { + const str = raw.toString('utf-8'); + // Match header including potential folded continuation lines + const regex = new RegExp( + `^(${escapeRegex(name)}:\\s*).*?(\\r?\\n(?=[^ \\t])|\\r?\\n$)`, + 'im', + ); + // Also need to consume folded lines + const foldedRegex = new RegExp( + `^${escapeRegex(name)}:[ \\t]*[^\\r\\n]*(?:\\r?\\n[ \\t]+[^\\r\\n]*)*`, + 'im', + ); + + const match = foldedRegex.exec(str); + if (!match) return raw; + + const before = str.slice(0, match.index); + const after = str.slice(match.index + match[0].length); + const replaced = `${before}${name}: ${newValue}${after}`; + return Buffer.from(replaced, 'utf-8'); +} + +/** + * Add a new header line right before the header/body separator. + */ +function addHeader(raw: Buffer, name: string, value: string): Buffer { + const str = raw.toString('utf-8'); + // Find the header/body boundary (first blank line) + const sep = str.match(/\r?\n\r?\n/); + if (!sep || sep.index === undefined) return raw; + + const before = str.slice(0, sep.index); + const after = str.slice(sep.index); + return Buffer.from(`${before}\r\n${name}: ${value}${after}`, 'utf-8'); +} + +function escapeRegex(s: string): string { + return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} diff --git a/email-worker-nodejs/config.ts b/email-worker-nodejs/config.ts new file mode 100644 index 0000000..e3ff7e2 --- /dev/null +++ b/email-worker-nodejs/config.ts @@ -0,0 +1,115 @@ +/** + * Configuration management for unified email worker + * + * All settings are read from environment variables with sensible defaults. + * Domain helpers (bucket name, queue name, internal check) are co-located here + * so every module can import { config, domainToBucket, ... } from './config'. + */ + +import { readFileSync, existsSync } from 'node:fs'; + +// --------------------------------------------------------------------------- +// Config object +// --------------------------------------------------------------------------- +export const config = { + // AWS + awsRegion: process.env.AWS_REGION ?? 'us-east-2', + + // Domains + domainsList: process.env.DOMAINS ?? '', + domainsFile: process.env.DOMAINS_FILE ?? '/etc/email-worker/domains.txt', + + // Worker + workerThreads: parseInt(process.env.WORKER_THREADS ?? '10', 10), + pollInterval: parseInt(process.env.POLL_INTERVAL ?? '20', 10), + maxMessages: parseInt(process.env.MAX_MESSAGES ?? '10', 10), + visibilityTimeout: parseInt(process.env.VISIBILITY_TIMEOUT ?? '300', 10), + + // SMTP delivery (local DMS) + smtpHost: process.env.SMTP_HOST ?? 'localhost', + smtpPort: parseInt(process.env.SMTP_PORT ?? '25', 10), + smtpUseTls: (process.env.SMTP_USE_TLS ?? 'false').toLowerCase() === 'true', + smtpUser: process.env.SMTP_USER ?? '', + smtpPass: process.env.SMTP_PASS ?? '', + smtpPoolSize: parseInt(process.env.SMTP_POOL_SIZE ?? '5', 10), + + // Internal SMTP port (for OOO / forwards to managed domains) + internalSmtpPort: parseInt(process.env.INTERNAL_SMTP_PORT ?? '25', 10), + + // DynamoDB tables + rulesTable: process.env.DYNAMODB_RULES_TABLE ?? 'email-rules', + messagesTable: process.env.DYNAMODB_MESSAGES_TABLE ?? 'ses-outbound-messages', + blockedTable: process.env.DYNAMODB_BLOCKED_TABLE ?? 'email-blocked-senders', + + // Bounce handling + bounceLookupRetries: parseInt(process.env.BOUNCE_LOOKUP_RETRIES ?? '3', 10), + bounceLookupDelay: parseFloat(process.env.BOUNCE_LOOKUP_DELAY ?? '1.0'), + + // Monitoring + metricsPort: parseInt(process.env.METRICS_PORT ?? '8000', 10), + healthPort: parseInt(process.env.HEALTH_PORT ?? '8080', 10), +} as const; + +export type Config = typeof config; + +// --------------------------------------------------------------------------- +// Managed domains (populated by loadDomains()) +// --------------------------------------------------------------------------- +const managedDomains = new Set(); + +/** + * Load domains from env var and/or file, populate the internal set. + */ +export function loadDomains(): string[] { + const domains: string[] = []; + + // From env + if (config.domainsList) { + for (const d of config.domainsList.split(',')) { + const trimmed = d.trim(); + if (trimmed) domains.push(trimmed); + } + } + + // From file + if (existsSync(config.domainsFile)) { + const content = readFileSync(config.domainsFile, 'utf-8'); + for (const line of content.split('\n')) { + const trimmed = line.trim(); + if (trimmed && !trimmed.startsWith('#')) { + domains.push(trimmed); + } + } + } + + // Deduplicate + const unique = [...new Set(domains)]; + + managedDomains.clear(); + for (const d of unique) { + managedDomains.add(d.toLowerCase()); + } + + return unique; +} + +// --------------------------------------------------------------------------- +// Domain helpers +// --------------------------------------------------------------------------- + +/** Check whether an email address belongs to one of our managed domains */ +export function isInternalAddress(email: string): boolean { + const atIdx = email.indexOf('@'); + if (atIdx < 0) return false; + return managedDomains.has(email.slice(atIdx + 1).toLowerCase()); +} + +/** Convert domain to SQS queue name: bizmatch.net → bizmatch-net-queue */ +export function domainToQueueName(domain: string): string { + return domain.replace(/\./g, '-') + '-queue'; +} + +/** Convert domain to S3 bucket name: bizmatch.net → bizmatch-net-emails */ +export function domainToBucketName(domain: string): string { + return domain.replace(/\./g, '-') + '-emails'; +} diff --git a/email-worker-nodejs/delivery.ts b/email-worker-nodejs/delivery.ts new file mode 100644 index 0000000..5e3250e --- /dev/null +++ b/email-worker-nodejs/delivery.ts @@ -0,0 +1,154 @@ +/** + * SMTP / email delivery with nodemailer pooled transport + * + * Replaces both Python's SMTPPool and EmailDelivery classes. + * nodemailer handles connection pooling, keepalive, and reconnection natively. + * + * Removed: LMTP delivery path (never used in production). + */ + +import { createTransport, type Transporter } from 'nodemailer'; +import { config } from '../config.js'; +import { log } from '../logger.js'; + +// --------------------------------------------------------------------------- +// Permanent error detection +// --------------------------------------------------------------------------- +const PERMANENT_INDICATORS = [ + '550', '551', '553', + 'mailbox not found', 'user unknown', 'no such user', + 'recipient rejected', 'does not exist', 'invalid recipient', + 'unknown user', +]; + +function isPermanentRecipientError(errorMsg: string): boolean { + const lower = errorMsg.toLowerCase(); + return PERMANENT_INDICATORS.some((ind) => lower.includes(ind)); +} + +// --------------------------------------------------------------------------- +// Delivery class +// --------------------------------------------------------------------------- +export class EmailDelivery { + private transport: Transporter; + + constructor() { + this.transport = createTransport({ + host: config.smtpHost, + port: config.smtpPort, + secure: config.smtpUseTls, + pool: true, + maxConnections: config.smtpPoolSize, + maxMessages: Infinity, // reuse connections indefinitely + tls: { rejectUnauthorized: false }, + ...(config.smtpUser && config.smtpPass + ? { auth: { user: config.smtpUser, pass: config.smtpPass } } + : {}), + }); + + log( + `📡 SMTP pool initialized → ${config.smtpHost}:${config.smtpPort} ` + + `(max ${config.smtpPoolSize} connections)`, + ); + } + + /** + * Send raw email to ONE recipient via the local DMS. + * + * Returns: [success, errorMessage?, isPermanent] + */ + async sendToRecipient( + fromAddr: string, + recipient: string, + rawMessage: Buffer, + workerName: string, + maxRetries = 2, + ): Promise<[boolean, string | null, boolean]> { + let lastError: string | null = null; + + for (let attempt = 0; attempt <= maxRetries; attempt++) { + try { + await this.transport.sendMail({ + envelope: { from: fromAddr, to: [recipient] }, + raw: rawMessage, + }); + + log(` ✓ ${recipient}: Delivered (SMTP)`, 'SUCCESS', workerName); + return [true, null, false]; + } catch (err: any) { + const errorMsg = err.message ?? String(err); + const responseCode = err.responseCode ?? 0; + + // Check for permanent errors (5xx) + if ( + responseCode >= 550 || + isPermanentRecipientError(errorMsg) + ) { + log( + ` ✗ ${recipient}: ${errorMsg} (permanent)`, + 'ERROR', + workerName, + ); + return [false, errorMsg, true]; + } + + // Connection-level errors → retry + if ( + err.code === 'ECONNRESET' || + err.code === 'ECONNREFUSED' || + err.code === 'ETIMEDOUT' || + errorMsg.toLowerCase().includes('disconnect') || + errorMsg.toLowerCase().includes('closed') || + errorMsg.toLowerCase().includes('connection') + ) { + log( + ` ⚠ ${recipient}: Connection error, retrying... ` + + `(attempt ${attempt + 1}/${maxRetries + 1})`, + 'WARNING', + workerName, + ); + lastError = errorMsg; + await sleep(300); + continue; + } + + // Other SMTP errors + const isPerm = isPermanentRecipientError(errorMsg); + log( + ` ✗ ${recipient}: ${errorMsg} (${isPerm ? 'permanent' : 'temporary'})`, + 'ERROR', + workerName, + ); + return [false, errorMsg, isPerm]; + } + } + + // All retries exhausted + log( + ` ✗ ${recipient}: All retries failed - ${lastError}`, + 'ERROR', + workerName, + ); + return [false, lastError ?? 'Connection failed after retries', false]; + } + + /** Verify the transport is reachable (used during startup). */ + async verify(): Promise { + try { + await this.transport.verify(); + return true; + } catch { + return false; + } + } + + /** Close all pooled connections. */ + close(): void { + this.transport.close(); + } +} + +// --------------------------------------------------------------------------- +function sleep(ms: number): Promise { + return new Promise((r) => setTimeout(r, ms)); +} diff --git a/email-worker-nodejs/docker-compose.yml b/email-worker-nodejs/docker-compose.yml new file mode 100644 index 0000000..929e0f1 --- /dev/null +++ b/email-worker-nodejs/docker-compose.yml @@ -0,0 +1,23 @@ +version: "3.8" + +services: + email-worker: + build: . + container_name: email-worker-ts + restart: unless-stopped + env_file: .env + volumes: + - ./domains.txt:/etc/email-worker/domains.txt:ro + - worker-logs:/var/log/email-worker + ports: + - "8000:8000" # Prometheus metrics + - "8080:8080" # Health check + # Connect to DMS on the host or Docker network + extra_hosts: + - "host.docker.internal:host-gateway" + environment: + - SMTP_HOST=host.docker.internal + - SMTP_PORT=25 + +volumes: + worker-logs: diff --git a/email-worker-nodejs/domain-poller.ts b/email-worker-nodejs/domain-poller.ts new file mode 100644 index 0000000..f979917 --- /dev/null +++ b/email-worker-nodejs/domain-poller.ts @@ -0,0 +1,151 @@ +/** + * Domain queue poller + * + * One poller per domain. Runs an async loop that long-polls SQS + * and dispatches messages to the MessageProcessor. + * + * Replaces Python's threading.Thread + threading.Event with + * a simple async loop + AbortController for cancellation. + */ + +import type { SQSHandler } from '../aws/sqs.js'; +import type { MessageProcessor } from './message-processor.js'; +import type { MetricsCollector } from '../metrics.js'; +import { log } from '../logger.js'; + +export interface DomainPollerStats { + domain: string; + processed: number; + errors: number; + lastActivity: Date | null; + running: boolean; +} + +export class DomainPoller { + public stats: DomainPollerStats; + private abort: AbortController; + private loopPromise: Promise | null = null; + + constructor( + private domain: string, + private queueUrl: string, + private sqs: SQSHandler, + private processor: MessageProcessor, + private metrics: MetricsCollector | null, + ) { + this.abort = new AbortController(); + this.stats = { + domain, + processed: 0, + errors: 0, + lastActivity: null, + running: false, + }; + } + + /** Start the polling loop. Returns immediately. */ + start(): void { + if (this.stats.running) return; + this.stats.running = true; + log(`▶ Started poller for ${this.domain}`, 'INFO', `poller-${this.domain}`); + this.loopPromise = this.pollLoop(); + } + + /** Signal the poller to stop and wait for it to finish. */ + async stop(): Promise { + if (!this.stats.running) return; + this.abort.abort(); + if (this.loopPromise) { + await this.loopPromise; + } + this.stats.running = false; + log(`⏹ Stopped poller for ${this.domain}`, 'INFO', `poller-${this.domain}`); + } + + // ----------------------------------------------------------------------- + // Poll loop + // ----------------------------------------------------------------------- + private async pollLoop(): Promise { + const workerName = `poller-${this.domain}`; + + while (!this.abort.signal.aborted) { + try { + // Report queue size + const qSize = await this.sqs.getQueueSize(this.queueUrl); + this.metrics?.setQueueSize(this.domain, qSize); + + if (qSize > 0) { + log(`📊 Queue ${this.domain}: ~${qSize} messages`, 'INFO', workerName); + } + + // Long-poll + const messages = await this.sqs.receiveMessages(this.queueUrl); + + if (this.abort.signal.aborted) break; + + if (messages.length === 0) continue; + + log( + `📬 Received ${messages.length} message(s) for ${this.domain}`, + 'INFO', + workerName, + ); + + // Process each message + for (const msg of messages) { + if (this.abort.signal.aborted) break; + + const receiveCount = parseInt( + msg.Attributes?.ApproximateReceiveCount ?? '1', + 10, + ); + + this.metrics?.incrementInFlight(); + const start = Date.now(); + + try { + const shouldDelete = await this.processor.processMessage( + this.domain, + msg, + receiveCount, + ); + + if (shouldDelete && msg.ReceiptHandle) { + await this.sqs.deleteMessage(this.queueUrl, msg.ReceiptHandle); + } + + this.stats.processed++; + this.stats.lastActivity = new Date(); + + const elapsed = ((Date.now() - start) / 1000).toFixed(2); + this.metrics?.observeProcessingTime(this.domain, parseFloat(elapsed)); + } catch (err: any) { + this.stats.errors++; + log( + `❌ Error processing message: ${err.message ?? err}`, + 'ERROR', + workerName, + ); + } finally { + this.metrics?.decrementInFlight(); + } + } + } catch (err: any) { + if (this.abort.signal.aborted) break; + this.stats.errors++; + log( + `❌ Polling error for ${this.domain}: ${err.message ?? err}`, + 'ERROR', + workerName, + ); + // Back off on repeated errors + await sleep(5000); + } + } + } +} + +// --------------------------------------------------------------------------- +function sleep(ms: number): Promise { + return new Promise((r) => setTimeout(r, ms)); +} diff --git a/email-worker-nodejs/dynamodb.ts b/email-worker-nodejs/dynamodb.ts new file mode 100644 index 0000000..a115803 --- /dev/null +++ b/email-worker-nodejs/dynamodb.ts @@ -0,0 +1,230 @@ +/** + * DynamoDB operations handler + * + * Tables: + * - email-rules → OOO / Forward rules per address + * - ses-outbound-messages → Bounce info (MessageId → original sender) + * - email-blocked-senders → Blocked patterns per address + */ + +import { DynamoDBClient } from '@aws-sdk/client-dynamodb'; +import { + DynamoDBDocumentClient, + GetCommand, + BatchGetCommand, +} from '@aws-sdk/lib-dynamodb'; +import { config } from '../config.js'; +import { log } from '../logger.js'; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- +export interface EmailRule { + email_address: string; + ooo_active?: boolean; + ooo_message?: string; + ooo_content_type?: string; + forwards?: string[]; + [key: string]: unknown; +} + +export interface BounceInfo { + original_source: string; + bounceType: string; + bounceSubType: string; + bouncedRecipients: string[]; + timestamp: string; +} + +// --------------------------------------------------------------------------- +// Handler +// --------------------------------------------------------------------------- +export class DynamoDBHandler { + private docClient: DynamoDBDocumentClient; + public available = false; + + constructor() { + const raw = new DynamoDBClient({ region: config.awsRegion }); + this.docClient = DynamoDBDocumentClient.from(raw, { + marshallOptions: { removeUndefinedValues: true }, + }); + this.initialize(); + } + + // ----------------------------------------------------------------------- + // Init + // ----------------------------------------------------------------------- + private initialize(): void { + // We just mark as available; actual connectivity is tested on first call. + // The Python version tested table_status, but that's a DescribeTable call + // which is heavy and not needed – the first GetItem will tell us. + this.available = true; + log('✓ DynamoDB client initialized'); + } + + /** + * Verify tables exist by doing a cheap GetItem on each. + * Called once during startup. + */ + async verifyTables(): Promise { + try { + await Promise.all([ + this.docClient.send( + new GetCommand({ TableName: config.rulesTable, Key: { email_address: '__probe__' } }), + ), + this.docClient.send( + new GetCommand({ TableName: config.messagesTable, Key: { MessageId: '__probe__' } }), + ), + this.docClient.send( + new GetCommand({ TableName: config.blockedTable, Key: { email_address: '__probe__' } }), + ), + ]); + this.available = true; + log('✓ DynamoDB tables connected successfully'); + return true; + } catch (err: any) { + log(`⚠ DynamoDB not fully available: ${err.message ?? err}`, 'WARNING'); + this.available = false; + return false; + } + } + + // ----------------------------------------------------------------------- + // Email rules + // ----------------------------------------------------------------------- + async getEmailRules(emailAddress: string): Promise { + if (!this.available) return null; + try { + const resp = await this.docClient.send( + new GetCommand({ + TableName: config.rulesTable, + Key: { email_address: emailAddress }, + }), + ); + return (resp.Item as EmailRule) ?? null; + } catch (err: any) { + if (err.name !== 'ResourceNotFoundException') { + log(`⚠ DynamoDB error for ${emailAddress}: ${err.message ?? err}`, 'ERROR'); + } + return null; + } + } + + // ----------------------------------------------------------------------- + // Bounce info + // ----------------------------------------------------------------------- + async getBounceInfo( + messageId: string, + workerName = 'unified', + ): Promise { + if (!this.available) return null; + + for (let attempt = 0; attempt < config.bounceLookupRetries; attempt++) { + try { + const resp = await this.docClient.send( + new GetCommand({ + TableName: config.messagesTable, + Key: { MessageId: messageId }, + }), + ); + + if (resp.Item) { + return { + original_source: (resp.Item.original_source as string) ?? '', + bounceType: (resp.Item.bounceType as string) ?? 'Unknown', + bounceSubType: (resp.Item.bounceSubType as string) ?? 'Unknown', + bouncedRecipients: (resp.Item.bouncedRecipients as string[]) ?? [], + timestamp: (resp.Item.timestamp as string) ?? '', + }; + } + + if (attempt < config.bounceLookupRetries - 1) { + log( + ` Bounce record not found yet, retrying in ${config.bounceLookupDelay}s ` + + `(attempt ${attempt + 1}/${config.bounceLookupRetries})...`, + 'INFO', + workerName, + ); + await sleep(config.bounceLookupDelay * 1000); + } else { + log( + `⚠ No bounce record found after ${config.bounceLookupRetries} attempts ` + + `for Message-ID: ${messageId}`, + 'WARNING', + workerName, + ); + return null; + } + } catch (err: any) { + log( + `⚠ DynamoDB Error (attempt ${attempt + 1}/${config.bounceLookupRetries}): ` + + `${err.message ?? err}`, + 'ERROR', + workerName, + ); + if (attempt < config.bounceLookupRetries - 1) { + await sleep(config.bounceLookupDelay * 1000); + } else { + return null; + } + } + } + return null; + } + + // ----------------------------------------------------------------------- + // Blocked senders + // ----------------------------------------------------------------------- + async getBlockedPatterns(emailAddress: string): Promise { + if (!this.available) return []; + try { + const resp = await this.docClient.send( + new GetCommand({ + TableName: config.blockedTable, + Key: { email_address: emailAddress }, + }), + ); + return (resp.Item?.blocked_patterns as string[]) ?? []; + } catch (err: any) { + log(`⚠ Error getting block list for ${emailAddress}: ${err.message ?? err}`, 'ERROR'); + return []; + } + } + + async batchGetBlockedPatterns( + emailAddresses: string[], + ): Promise> { + const empty: Record = {}; + for (const a of emailAddresses) empty[a] = []; + if (!this.available || emailAddresses.length === 0) return empty; + + try { + const keys = emailAddresses.map((a) => ({ email_address: a })); + const resp = await this.docClient.send( + new BatchGetCommand({ + RequestItems: { + [config.blockedTable]: { Keys: keys }, + }, + }), + ); + + const items = resp.Responses?.[config.blockedTable] ?? []; + const result: Record = { ...empty }; + for (const item of items) { + const addr = item.email_address as string; + result[addr] = (item.blocked_patterns as string[]) ?? []; + } + return result; + } catch (err: any) { + log(`⚠ Batch blocklist check error: ${err.message ?? err}`, 'ERROR'); + return empty; + } + } +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} diff --git a/email-worker-nodejs/health.ts b/email-worker-nodejs/health.ts new file mode 100644 index 0000000..fae6b4e --- /dev/null +++ b/email-worker-nodejs/health.ts @@ -0,0 +1,48 @@ +/** + * Health check HTTP server + * + * Provides a simple /health endpoint for Docker healthcheck + * and monitoring. Returns domain list and feature flags. + */ + +import { createServer, type Server } from 'node:http'; +import { log } from './logger.js'; + +export function startHealthServer( + port: number, + domains: string[], + getStats?: () => any, +): Server { + const server = createServer((_req, res) => { + const stats = getStats?.() ?? {}; + + const payload = { + status: 'healthy', + worker: 'unified-email-worker-ts', + version: '2.0.0', + domains, + domainCount: domains.length, + features: { + bounce_handling: true, + ooo_replies: true, + forwarding: true, + blocklist: true, + prometheus_metrics: true, + lmtp: false, + legacy_smtp_forward: false, + }, + stats, + uptime: process.uptime(), + timestamp: new Date().toISOString(), + }; + + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify(payload, null, 2)); + }); + + server.listen(port, () => { + log(`Health check on port ${port}`); + }); + + return server; +} diff --git a/email-worker-nodejs/logger.ts b/email-worker-nodejs/logger.ts new file mode 100644 index 0000000..5937fb7 --- /dev/null +++ b/email-worker-nodejs/logger.ts @@ -0,0 +1,98 @@ +/** + * Structured logging for email worker with daily rotation + * + * Uses pino for high-performance JSON logging. + * Console output is human-readable via pino-pretty in dev, + * and JSON in production (for Docker json-file driver). + * + * File logging uses a simple daily rotation approach. + */ + +import pino from 'pino'; +import { existsSync, mkdirSync, createWriteStream, type WriteStream } from 'node:fs'; +import { join } from 'node:path'; + +// --------------------------------------------------------------------------- +// Configuration +// --------------------------------------------------------------------------- +const LOG_DIR = '/var/log/email-worker'; +const LOG_FILE_PREFIX = 'worker'; + +// --------------------------------------------------------------------------- +// File stream (best-effort, never crashes the worker) +// --------------------------------------------------------------------------- +let fileStream: WriteStream | null = null; +let currentDateStr = ''; + +function getDateStr(): string { + return new Date().toISOString().slice(0, 10); // YYYY-MM-DD +} + +function ensureFileStream(): WriteStream | null { + const today = getDateStr(); + if (fileStream && currentDateStr === today) return fileStream; + + try { + if (!existsSync(LOG_DIR)) mkdirSync(LOG_DIR, { recursive: true }); + const filePath = join(LOG_DIR, `${LOG_FILE_PREFIX}.${today}.log`); + fileStream = createWriteStream(filePath, { flags: 'a' }); + currentDateStr = today; + return fileStream; + } catch { + // Silently continue without file logging (e.g. permission issue) + return null; + } +} + +// --------------------------------------------------------------------------- +// Pino logger +// --------------------------------------------------------------------------- +const logger = pino({ + level: 'info', + formatters: { + level(label) { + return { level: label }; + }, + }, + timestamp: pino.stdTimeFunctions.isoTime, + // In production Docker we write plain JSON to stdout; + // pino-pretty can be used during dev via `pino-pretty` pipe. +}); + +// --------------------------------------------------------------------------- +// Log level mapping (matches Python worker levels) +// --------------------------------------------------------------------------- +type LogLevel = 'DEBUG' | 'INFO' | 'WARNING' | 'ERROR' | 'CRITICAL' | 'SUCCESS'; + +const LEVEL_MAP: Record = { + DEBUG: 'debug', + INFO: 'info', + WARNING: 'warn', + ERROR: 'error', + CRITICAL: 'fatal', + SUCCESS: 'info', +}; + +// --------------------------------------------------------------------------- +// Public API – mirrors Python's log(message, level, worker_name) +// --------------------------------------------------------------------------- +export function log( + message: string, + level: LogLevel = 'INFO', + workerName = 'unified-worker', +): void { + const prefix = level === 'SUCCESS' ? '[SUCCESS] ' : ''; + const formatted = `[${workerName}] ${prefix}${message}`; + + // Pino + const method = LEVEL_MAP[level] ?? 'info'; + (logger as any)[method](formatted); + + // File (best-effort) + const stream = ensureFileStream(); + if (stream) { + const ts = new Date().toISOString().replace('T', ' ').slice(0, 19); + const line = `[${ts}] [${level}] [${workerName}] ${prefix}${message}\n`; + stream.write(line); + } +} diff --git a/email-worker-nodejs/main.ts b/email-worker-nodejs/main.ts new file mode 100644 index 0000000..bed36a8 --- /dev/null +++ b/email-worker-nodejs/main.ts @@ -0,0 +1,89 @@ +/** + * Main entry point for unified email worker + * + * Startup sequence: + * 1. Load configuration and domains + * 2. Start Prometheus metrics server + * 3. Start health check server + * 4. Initialize UnifiedWorker + * 5. Register signal handlers for graceful shutdown + */ + +import { config, loadDomains } from './config.js'; +import { log } from './logger.js'; +import { startMetricsServer, type MetricsCollector } from './metrics.js'; +import { startHealthServer } from './health.js'; +import { UnifiedWorker } from './worker/index.js'; + +// --------------------------------------------------------------------------- +// Banner +// --------------------------------------------------------------------------- +function printBanner(domains: string[]): void { + log('╔══════════════════════════════════════════════════╗'); + log('║ Unified Email Worker (TypeScript) ║'); + log('║ Version 2.0.0 ║'); + log('╚══════════════════════════════════════════════════╝'); + log(''); + log(`Domains (${domains.length}):`); + for (const d of domains) { + log(` • ${d}`); + } + log(''); + log(`SMTP: ${config.smtpHost}:${config.smtpPort}`); + log(`Internal SMTP: port ${config.internalSmtpPort}`); + log(`Poll interval: ${config.pollInterval}s`); + log(`Metrics: port ${config.metricsPort}`); + log(`Health: port ${config.healthPort}`); + log(''); +} + +// --------------------------------------------------------------------------- +// Main +// --------------------------------------------------------------------------- +async function main(): Promise { + // 1. Load domains + const domains = loadDomains(); + if (domains.length === 0) { + log('❌ No domains configured. Set DOMAINS env var or provide DOMAINS_FILE.', 'ERROR'); + process.exit(1); + } + + printBanner(domains); + + // 2. Metrics server + const metrics: MetricsCollector | null = await startMetricsServer(config.metricsPort); + + // 3. Unified worker + const worker = new UnifiedWorker(domains, metrics); + + // 4. Health server + startHealthServer(config.healthPort, domains, () => worker.getStats()); + + // 5. Signal handlers + let shuttingDown = false; + + const shutdown = async (signal: string) => { + if (shuttingDown) return; + shuttingDown = true; + log(`\n🛑 Received ${signal}. Shutting down gracefully...`); + await worker.stop(); + log('👋 Goodbye.'); + process.exit(0); + }; + + process.on('SIGINT', () => shutdown('SIGINT')); + process.on('SIGTERM', () => shutdown('SIGTERM')); + + // 6. Start + await worker.start(); + + // Keep alive (event loop stays open due to HTTP servers + SQS polling) + log('✅ Worker is running. Press Ctrl+C to stop.'); +} + +// --------------------------------------------------------------------------- +main().catch((err) => { + log(`💥 Fatal startup error: ${err.message ?? err}`, 'CRITICAL'); + log(err.stack ?? '', 'CRITICAL'); + process.exit(1); +}); diff --git a/email-worker-nodejs/message-processor.ts b/email-worker-nodejs/message-processor.ts new file mode 100644 index 0000000..a2e3e77 --- /dev/null +++ b/email-worker-nodejs/message-processor.ts @@ -0,0 +1,328 @@ +/** + * Email message processing worker + * + * Processes a single SQS message: + * 1. Unpack SNS/SES envelope + * 2. Download raw email from S3 + * 3. Loop detection + * 4. Parse & sanitize headers + * 5. Bounce detection & header rewrite + * 6. Blocklist check + * 7. Process recipients (rules, SMTP delivery) + * 8. Mark result in S3 metadata + */ + +import type { Message } from '@aws-sdk/client-sqs'; +import type { S3Handler } from '../aws/s3.js'; +import type { SQSHandler } from '../aws/sqs.js'; +import type { SESHandler } from '../aws/ses.js'; +import type { DynamoDBHandler } from '../aws/dynamodb.js'; +import type { EmailDelivery } from '../smtp/delivery.js'; +import type { MetricsCollector } from '../metrics.js'; +import { + parseEmail, + isProcessedByWorker, + BounceHandler, + RulesProcessor, + BlocklistChecker, +} from '../email/index.js'; +import { domainToBucketName } from '../config.js'; +import { log } from '../logger.js'; + +// --------------------------------------------------------------------------- +// Processor +// --------------------------------------------------------------------------- +export class MessageProcessor { + private bounceHandler: BounceHandler; + private rulesProcessor: RulesProcessor; + private blocklist: BlocklistChecker; + + public metrics: MetricsCollector | null = null; + + constructor( + private s3: S3Handler, + private sqs: SQSHandler, + private ses: SESHandler, + private dynamodb: DynamoDBHandler, + private delivery: EmailDelivery, + ) { + this.bounceHandler = new BounceHandler(dynamodb); + this.rulesProcessor = new RulesProcessor(dynamodb, ses); + this.blocklist = new BlocklistChecker(dynamodb); + } + + /** + * Process one email message from queue. + * Returns true → delete from queue. + * Returns false → leave in queue for retry. + */ + async processMessage( + domain: string, + message: Message, + receiveCount: number, + ): Promise { + const workerName = `worker-${domain}`; + + try { + // 1. UNPACK (SNS → SES) + const body = JSON.parse(message.Body ?? '{}'); + let sesMsg: any; + + if (body.Message && body.Type) { + // SNS Notification wrapper + const snsContent = body.Message; + sesMsg = typeof snsContent === 'string' ? JSON.parse(snsContent) : snsContent; + } else { + sesMsg = body; + } + + // 2. EXTRACT DATA + const mail = sesMsg.mail ?? {}; + const receipt = sesMsg.receipt ?? {}; + const messageId: string | undefined = mail.messageId; + + // Ignore SES setup notifications + if (messageId === 'AMAZON_SES_SETUP_NOTIFICATION') { + log('ℹ️ Received Amazon SES Setup Notification. Ignoring.', 'INFO', workerName); + return true; + } + + const fromAddr: string = mail.source ?? ''; + const recipients: string[] = receipt.recipients ?? []; + + if (!messageId) { + log('❌ Error: No messageId in event payload', 'ERROR', workerName); + return true; + } + + // Domain validation + if (recipients.length === 0) { + log('⚠ Warning: No recipients in event', 'WARNING', workerName); + return true; + } + + const recipientDomain = recipients[0].split('@')[1]; + if (recipientDomain.toLowerCase() !== domain.toLowerCase()) { + log( + `⚠ Security: Ignored message for ${recipientDomain} ` + + `(I am worker for ${domain})`, + 'WARNING', + workerName, + ); + return true; + } + + // Compact log + const recipientsStr = + recipients.length === 1 + ? recipients[0] + : `${recipients.length} recipients`; + log( + `📧 Processing: ${messageId.slice(0, 20)}... -> ${recipientsStr}`, + 'INFO', + workerName, + ); + + // 3. DOWNLOAD FROM S3 + const rawBytes = await this.s3.getEmail(domain, messageId, receiveCount); + if (rawBytes === null) return false; // retry later + + // 4. LOOP DETECTION + const tempParsed = await parseEmail(rawBytes); + const skipRules = isProcessedByWorker(tempParsed); + if (skipRules) { + log('🔄 Loop prevention: Already processed by worker', 'INFO', workerName); + } + + // 5. PARSING & BOUNCE LOGIC + let finalRawBytes = rawBytes; + let fromAddrFinal = fromAddr; + let isBounce = false; + + try { + const parsed = await parseEmail(rawBytes); + const subject = parsed.subject ?? '(no subject)'; + + // Bounce header rewriting + const bounceResult = await this.bounceHandler.applyBounceLogic( + parsed, + rawBytes, + subject, + workerName, + ); + + isBounce = bounceResult.isBounce; + finalRawBytes = bounceResult.rawBytes; + + if (bounceResult.modified) { + log(' ✨ Bounce detected & headers rewritten via DynamoDB', 'INFO', workerName); + fromAddrFinal = bounceResult.fromAddr; + this.metrics?.incrementBounce(domain, 'rewritten'); + } else { + fromAddrFinal = fromAddr; + } + + // Add processing marker for non-processed emails + if (!skipRules) { + finalRawBytes = addProcessedHeader(finalRawBytes); + } + + // Re-parse after modifications for rules processing + var parsedFinal = await parseEmail(finalRawBytes); + } catch (err: any) { + log( + `⚠ Parsing/Logic Error: ${err.message ?? err}. Sending original.`, + 'WARNING', + workerName, + ); + log(`Full error: ${err.stack ?? err}`, 'ERROR', workerName); + fromAddrFinal = fromAddr; + isBounce = false; + var parsedFinal = await parseEmail(rawBytes); + } + + // 6. BLOCKLIST CHECK + const blockedByRecipient = await this.blocklist.batchCheckBlockedSenders( + recipients, + fromAddrFinal, + workerName, + ); + + // 7. PROCESS RECIPIENTS + log(`📤 Sending to ${recipients.length} recipient(s)...`, 'INFO', workerName); + + const successful: string[] = []; + const failedPermanent: string[] = []; + const failedTemporary: string[] = []; + const blockedRecipients: string[] = []; + + for (const recipient of recipients) { + // Blocked? + if (blockedByRecipient[recipient]) { + log( + `🗑 Silently dropping message for ${recipient} (Sender blocked)`, + 'INFO', + workerName, + ); + blockedRecipients.push(recipient); + this.metrics?.incrementBlocked(domain); + continue; + } + + // Process rules (OOO, Forwarding) — not for bounces or already forwarded + if (!isBounce && !skipRules) { + const metricsCallback = (action: 'autoreply' | 'forward', dom: string) => { + if (action === 'autoreply') this.metrics?.incrementAutoreply(dom); + else if (action === 'forward') this.metrics?.incrementForward(dom); + }; + + await this.rulesProcessor.processRulesForRecipient( + recipient, + parsedFinal, + finalRawBytes, + domain, + workerName, + metricsCallback, + ); + } + + // SMTP delivery + const [success, error, isPerm] = await this.delivery.sendToRecipient( + fromAddrFinal, + recipient, + finalRawBytes, + workerName, + ); + + if (success) { + successful.push(recipient); + this.metrics?.incrementProcessed(domain, 'success'); + } else if (isPerm) { + failedPermanent.push(recipient); + this.metrics?.incrementProcessed(domain, 'permanent_failure'); + } else { + failedTemporary.push(recipient); + this.metrics?.incrementProcessed(domain, 'temporary_failure'); + } + } + + // 8. RESULT & CLEANUP + const totalHandled = + successful.length + failedPermanent.length + blockedRecipients.length; + + if (totalHandled === recipients.length) { + if (blockedRecipients.length === recipients.length) { + // All blocked + try { + await this.s3.markAsBlocked( + domain, + messageId, + blockedRecipients, + fromAddrFinal, + workerName, + ); + await this.s3.deleteBlockedEmail(domain, messageId, workerName); + } catch (err: any) { + log(`⚠ Failed to handle blocked email: ${err.message ?? err}`, 'ERROR', workerName); + return false; + } + } else if (successful.length > 0) { + await this.s3.markAsProcessed( + domain, + messageId, + workerName, + failedPermanent.length > 0 ? failedPermanent : undefined, + ); + } else if (failedPermanent.length > 0) { + await this.s3.markAsAllInvalid( + domain, + messageId, + failedPermanent, + workerName, + ); + } + + // Summary + const parts: string[] = []; + if (successful.length) parts.push(`${successful.length} OK`); + if (failedPermanent.length) parts.push(`${failedPermanent.length} invalid`); + if (blockedRecipients.length) parts.push(`${blockedRecipients.length} blocked`); + + log(`✅ Completed (${parts.join(', ')})`, 'SUCCESS', workerName); + return true; + } else { + // Temporary failures remain + log( + `🔄 Temp failure (${failedTemporary.length} failed), will retry`, + 'WARNING', + workerName, + ); + return false; + } + } catch (err: any) { + log(`❌ CRITICAL WORKER ERROR: ${err.message ?? err}`, 'ERROR', workerName); + log(err.stack ?? '', 'ERROR', workerName); + return false; + } + } +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** + * Add X-SES-Worker-Processed header to raw email bytes. + */ +function addProcessedHeader(raw: Buffer): Buffer { + const str = raw.toString('utf-8'); + const sep = str.match(/\r?\n\r?\n/); + if (!sep || sep.index === undefined) return raw; + + const before = str.slice(0, sep.index); + const after = str.slice(sep.index); + return Buffer.from( + `${before}\r\nX-SES-Worker-Processed: delivered${after}`, + 'utf-8', + ); +} diff --git a/email-worker-nodejs/metrics.ts b/email-worker-nodejs/metrics.ts new file mode 100644 index 0000000..8d614b6 --- /dev/null +++ b/email-worker-nodejs/metrics.ts @@ -0,0 +1,155 @@ +/** + * Prometheus metrics collection + * + * Uses prom-client. Falls back gracefully if not available. + */ + +import { log } from './logger.js'; +import type * as PromClientTypes from 'prom-client'; + +// prom-client is optional — import dynamically +let promClient: typeof PromClientTypes | null = null; +try { + promClient = require('prom-client') as typeof PromClientTypes; +} catch { + // not installed +} + +// --------------------------------------------------------------------------- +// Metric instances (created lazily if prom-client is available) +// --------------------------------------------------------------------------- +let emailsProcessed: any; +let emailsInFlight: any; +let processingTime: any; +let queueSize: any; +let bouncesProcessed: any; +let autorepliesSent: any; +let forwardsSent: any; +let blockedSenders: any; + +function initMetrics(): void { + if (!promClient) return; + const { Counter, Gauge, Histogram } = promClient; + + emailsProcessed = new Counter({ + name: 'emails_processed_total', + help: 'Total emails processed', + labelNames: ['domain', 'status'], + }); + emailsInFlight = new Gauge({ + name: 'emails_in_flight', + help: 'Emails currently being processed', + }); + processingTime = new Histogram({ + name: 'email_processing_seconds', + help: 'Time to process email', + labelNames: ['domain'], + }); + queueSize = new Gauge({ + name: 'queue_messages_available', + help: 'Messages in queue', + labelNames: ['domain'], + }); + bouncesProcessed = new Counter({ + name: 'bounces_processed_total', + help: 'Bounce notifications processed', + labelNames: ['domain', 'type'], + }); + autorepliesSent = new Counter({ + name: 'autoreplies_sent_total', + help: 'Auto-replies sent', + labelNames: ['domain'], + }); + forwardsSent = new Counter({ + name: 'forwards_sent_total', + help: 'Forwards sent', + labelNames: ['domain'], + }); + blockedSenders = new Counter({ + name: 'blocked_senders_total', + help: 'Emails blocked by blacklist', + labelNames: ['domain'], + }); +} + +// --------------------------------------------------------------------------- +// MetricsCollector +// --------------------------------------------------------------------------- +export class MetricsCollector { + public readonly enabled: boolean; + + constructor() { + this.enabled = !!promClient; + if (this.enabled) initMetrics(); + } + + incrementProcessed(domain: string, status: string): void { + emailsProcessed?.labels(domain, status).inc(); + } + + incrementInFlight(): void { + emailsInFlight?.inc(); + } + + decrementInFlight(): void { + emailsInFlight?.dec(); + } + + observeProcessingTime(domain: string, seconds: number): void { + processingTime?.labels(domain).observe(seconds); + } + + setQueueSize(domain: string, size: number): void { + queueSize?.labels(domain).set(size); + } + + incrementBounce(domain: string, bounceType: string): void { + bouncesProcessed?.labels(domain, bounceType).inc(); + } + + incrementAutoreply(domain: string): void { + autorepliesSent?.labels(domain).inc(); + } + + incrementForward(domain: string): void { + forwardsSent?.labels(domain).inc(); + } + + incrementBlocked(domain: string): void { + blockedSenders?.labels(domain).inc(); + } +} + +// --------------------------------------------------------------------------- +// Start metrics HTTP server +// --------------------------------------------------------------------------- +export async function startMetricsServer(port: number): Promise { + if (!promClient) { + log('⚠ Prometheus client not installed, metrics disabled', 'WARNING'); + return null; + } + + try { + const { createServer } = await import('node:http'); + const { register } = promClient; + + const server = createServer(async (_req, res) => { + try { + res.setHeader('Content-Type', register.contentType); + res.end(await register.metrics()); + } catch { + res.statusCode = 500; + res.end(); + } + }); + + server.listen(port, () => { + log(`Prometheus metrics on port ${port}`); + }); + + return new MetricsCollector(); + } catch (err: any) { + log(`Failed to start metrics server: ${err.message ?? err}`, 'ERROR'); + return null; + } +} diff --git a/email-worker-nodejs/package.json b/email-worker-nodejs/package.json new file mode 100644 index 0000000..cca0b4f --- /dev/null +++ b/email-worker-nodejs/package.json @@ -0,0 +1,37 @@ +{ + "name": "unified-email-worker", + "version": "2.0.0", + "description": "Unified multi-domain email worker (TypeScript)", + "main": "dist/main.js", + "scripts": { + "build": "tsc", + "start": "node dist/main.js", + "dev": "tsx src/main.ts", + "lint": "eslint src/", + "typecheck": "tsc --noEmit" + }, + "dependencies": { + "@aws-sdk/client-dynamodb": "^3.700.0", + "@aws-sdk/client-s3": "^3.700.0", + "@aws-sdk/client-ses": "^3.700.0", + "@aws-sdk/client-sqs": "^3.700.0", + "@aws-sdk/lib-dynamodb": "^3.700.0", + "mailparser": "^3.7.1", + "nodemailer": "^6.9.16", + "picomatch": "^4.0.2", + "pino": "^9.5.0", + "pino-pretty": "^13.0.0", + "prom-client": "^15.1.3" + }, + "devDependencies": { + "@types/mailparser": "^3.4.5", + "@types/nodemailer": "^6.4.17", + "@types/picomatch": "^3.0.1", + "@types/node": "^22.10.0", + "tsx": "^4.19.0", + "typescript": "^5.7.0" + }, + "engines": { + "node": ">=20.0.0" + } +} diff --git a/email-worker-nodejs/parser.ts b/email-worker-nodejs/parser.ts new file mode 100644 index 0000000..697eef6 --- /dev/null +++ b/email-worker-nodejs/parser.ts @@ -0,0 +1,120 @@ +/** + * Email parsing utilities + * + * Wraps `mailparser` for parsing raw MIME bytes and provides + * header sanitization (e.g. Microsoft's malformed Message-IDs). + */ + +import { simpleParser, type ParsedMail } from 'mailparser'; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- +export interface BodyParts { + text: string; + html: string | null; +} + +// --------------------------------------------------------------------------- +// Parser +// --------------------------------------------------------------------------- + +/** + * Parse raw email bytes into a ParsedMail object. + * Applies pre-sanitization for known malformed headers before parsing. + */ +export async function parseEmail(raw: Buffer): Promise { + // Pre-sanitize: fix Microsoft's [uuid]@domain Message-IDs + const sanitized = sanitizeRawHeaders(raw); + return simpleParser(sanitized); +} + +/** + * Extract text and HTML body parts from a parsed email. + */ +export function extractBodyParts(parsed: ParsedMail): BodyParts { + const text = parsed.text?.trim() || '(No body content)'; + const html = parsed.html || null; + return { text, html }; +} + +/** + * Check if email was already processed by our worker (loop detection). + */ +export function isProcessedByWorker(parsed: ParsedMail): boolean { + const headers = parsed.headers; + const xWorker = headers.get('x-ses-worker-processed'); + const autoSubmitted = headers.get('auto-submitted'); + + const isProcessedByUs = !!xWorker; + const isOurAutoReply = autoSubmitted === 'auto-replied' && !!xWorker; + + return isProcessedByUs || isOurAutoReply; +} + +/** + * Check if email is a SES MAILER-DAEMON bounce notification. + */ +export function isSesBounceNotification(parsed: ParsedMail): boolean { + const from = (parsed.from?.text ?? '').toLowerCase(); + return from.includes('mailer-daemon@') && from.includes('amazonses.com'); +} + +/** + * Get a header value as string. Handles mailparser's headerlines Map. + */ +export function getHeader(parsed: ParsedMail, name: string): string { + const val = parsed.headers.get(name.toLowerCase()); + if (val === undefined || val === null) return ''; + if (typeof val === 'string') return val; + if (typeof val === 'object' && 'text' in val) return (val as any).text ?? ''; + return String(val); +} + +// --------------------------------------------------------------------------- +// Raw header sanitization +// --------------------------------------------------------------------------- + +/** + * Fix known problematic patterns in raw MIME headers BEFORE parsing. + * + * Specifically targets Microsoft's `Message-ID: <[uuid]@domain>` which + * causes strict parsers to crash. + */ +function sanitizeRawHeaders(raw: Buffer): Buffer { + // We only need to check/fix the header section (before first blank line). + // For efficiency we work on the first ~8KB where headers live. + const headerEnd = findDoubleNewline(raw); + const headerLen = headerEnd === -1 ? Math.min(raw.length, 8192) : headerEnd; + const headerStr = raw.subarray(0, headerLen).toString('utf-8'); + + // Fix: Message-ID with square brackets <[...]@...> + if (headerStr.includes('[') || headerStr.includes(']')) { + const fixed = headerStr.replace( + /^(Message-ID:\s*]*>?\s*)$/im, + (_match, prefix, bracketed, suffix) => + prefix + bracketed.replace(/\[/g, '').replace(/\]/g, '') + suffix, + ); + if (fixed !== headerStr) { + return Buffer.concat([ + Buffer.from(fixed, 'utf-8'), + raw.subarray(headerLen), + ]); + } + } + + return raw; +} + +function findDoubleNewline(buf: Buffer): number { + // Look for \r\n\r\n or \n\n + for (let i = 0; i < buf.length - 3; i++) { + if (buf[i] === 0x0d && buf[i + 1] === 0x0a && buf[i + 2] === 0x0d && buf[i + 3] === 0x0a) { + return i; + } + if (buf[i] === 0x0a && buf[i + 1] === 0x0a) { + return i; + } + } + return -1; +} diff --git a/email-worker-nodejs/rules-processor.ts b/email-worker-nodejs/rules-processor.ts new file mode 100644 index 0000000..fd03e87 --- /dev/null +++ b/email-worker-nodejs/rules-processor.ts @@ -0,0 +1,413 @@ +/** + * Email rules processing (Auto-Reply / OOO and Forwarding) + * + * Removed: Legacy SMTP forward (forward_smtp_override) + * Remaining paths: + * - OOO → internal (SMTP port 25) or external (SES) + * - Forward → internal (SMTP port 25) or external (SES) + */ + +import { createTransport, type Transporter } from 'nodemailer'; +import type { ParsedMail } from 'mailparser'; +import type { DynamoDBHandler, EmailRule } from '../aws/dynamodb.js'; +import type { SESHandler } from '../aws/ses.js'; +import { extractBodyParts } from './parser.js'; +import { config, isInternalAddress } from '../config.js'; +import { log } from '../logger.js'; + +export type MetricsCallback = (action: 'autoreply' | 'forward', domain: string) => void; + +export class RulesProcessor { + constructor( + private dynamodb: DynamoDBHandler, + private ses: SESHandler, + ) {} + + /** + * Process OOO and Forward rules for a single recipient. + * Returns false always (no skip_local_delivery since legacy SMTP removed). + */ + async processRulesForRecipient( + recipient: string, + parsed: ParsedMail, + rawBytes: Buffer, + domain: string, + workerName: string, + metricsCallback?: MetricsCallback, + ): Promise { + const rule = await this.dynamodb.getEmailRules(recipient.toLowerCase()); + if (!rule) return false; + + const originalFrom = parsed.from?.text ?? ''; + const senderAddr = extractSenderAddress(originalFrom); + + // OOO / Auto-Reply + if (rule.ooo_active) { + await this.handleOoo( + recipient, + parsed, + senderAddr, + rule, + domain, + workerName, + metricsCallback, + ); + } + + // Forwarding + const forwards = rule.forwards ?? []; + if (forwards.length > 0) { + await this.handleForwards( + recipient, + parsed, + originalFrom, + forwards, + domain, + workerName, + metricsCallback, + ); + } + + return false; // never skip local delivery + } + + // ----------------------------------------------------------------------- + // OOO + // ----------------------------------------------------------------------- + private async handleOoo( + recipient: string, + parsed: ParsedMail, + senderAddr: string, + rule: EmailRule, + domain: string, + workerName: string, + metricsCallback?: MetricsCallback, + ): Promise { + // Don't reply to automatic messages + const autoSubmitted = parsed.headers.get('auto-submitted'); + const precedence = String(parsed.headers.get('precedence') ?? '').toLowerCase(); + + if (autoSubmitted && autoSubmitted !== 'no') { + log(' ⏭ Skipping OOO for auto-submitted message', 'INFO', workerName); + return; + } + if (['bulk', 'junk', 'list'].includes(precedence)) { + log(` ⏭ Skipping OOO for ${precedence} message`, 'INFO', workerName); + return; + } + if (/noreply|no-reply|mailer-daemon/i.test(senderAddr)) { + log(' ⏭ Skipping OOO for noreply address', 'INFO', workerName); + return; + } + + try { + const oooMsg = (rule.ooo_message as string) ?? 'I am out of office.'; + const contentType = (rule.ooo_content_type as string) ?? 'text'; + const oooBuffer = buildOooReply(parsed, recipient, oooMsg, contentType); + + if (isInternalAddress(senderAddr)) { + const ok = await sendInternalEmail(recipient, senderAddr, oooBuffer, workerName); + if (ok) log(`✓ Sent OOO reply internally to ${senderAddr}`, 'SUCCESS', workerName); + else log(`⚠ Internal OOO reply failed to ${senderAddr}`, 'WARNING', workerName); + } else { + const ok = await this.ses.sendRawEmail(recipient, senderAddr, oooBuffer, workerName); + if (ok) log(`✓ Sent OOO reply externally to ${senderAddr} via SES`, 'SUCCESS', workerName); + } + + metricsCallback?.('autoreply', domain); + } catch (err: any) { + log(`⚠ OOO reply failed to ${senderAddr}: ${err.message ?? err}`, 'ERROR', workerName); + } + } + + // ----------------------------------------------------------------------- + // Forwarding + // ----------------------------------------------------------------------- + private async handleForwards( + recipient: string, + parsed: ParsedMail, + originalFrom: string, + forwards: string[], + domain: string, + workerName: string, + metricsCallback?: MetricsCallback, + ): Promise { + for (const forwardTo of forwards) { + try { + const fwdBuffer = buildForwardMessage(parsed, recipient, forwardTo, originalFrom); + + if (isInternalAddress(forwardTo)) { + const ok = await sendInternalEmail(recipient, forwardTo, fwdBuffer, workerName); + if (ok) log(`✓ Forwarded internally to ${forwardTo}`, 'SUCCESS', workerName); + else log(`⚠ Internal forward failed to ${forwardTo}`, 'WARNING', workerName); + } else { + const ok = await this.ses.sendRawEmail(recipient, forwardTo, fwdBuffer, workerName); + if (ok) log(`✓ Forwarded externally to ${forwardTo} via SES`, 'SUCCESS', workerName); + } + + metricsCallback?.('forward', domain); + } catch (err: any) { + log(`⚠ Forward failed to ${forwardTo}: ${err.message ?? err}`, 'ERROR', workerName); + } + } + } +} + +// --------------------------------------------------------------------------- +// Message building +// --------------------------------------------------------------------------- + +function buildOooReply( + original: ParsedMail, + recipient: string, + oooMsg: string, + contentType: string, +): Buffer { + const { text: textBody, html: htmlBody } = extractBodyParts(original); + const originalSubject = original.subject ?? '(no subject)'; + const originalFrom = original.from?.text ?? 'unknown'; + const originalMsgId = original.messageId ?? ''; + const recipientDomain = recipient.split('@')[1]; + + // Text version + let textContent = `${oooMsg}\n\n--- Original Message ---\n`; + textContent += `From: ${originalFrom}\n`; + textContent += `Subject: ${originalSubject}\n\n`; + textContent += textBody; + + // HTML version + let htmlContent = `
${oooMsg}



`; + htmlContent += 'Original Message
'; + htmlContent += `From: ${originalFrom}
`; + htmlContent += `Subject: ${originalSubject}

`; + htmlContent += htmlBody ? htmlBody : textBody.replace(/\n/g, '
'); + + const includeHtml = contentType === 'html' || !!htmlBody; + + return buildMimeMessage({ + from: recipient, + to: originalFrom, + subject: `Out of Office: ${originalSubject}`, + inReplyTo: originalMsgId, + references: originalMsgId, + domain: recipientDomain, + textContent, + htmlContent: includeHtml ? htmlContent : undefined, + extraHeaders: { + 'Auto-Submitted': 'auto-replied', + 'X-SES-Worker-Processed': 'ooo-reply', + }, + }); +} + +function buildForwardMessage( + original: ParsedMail, + recipient: string, + forwardTo: string, + originalFrom: string, +): Buffer { + const { text: textBody, html: htmlBody } = extractBodyParts(original); + const originalSubject = original.subject ?? '(no subject)'; + const originalDate = original.date?.toUTCString() ?? 'unknown'; + const recipientDomain = recipient.split('@')[1]; + + // Text version + let fwdText = '---------- Forwarded message ---------\n'; + fwdText += `From: ${originalFrom}\n`; + fwdText += `Date: ${originalDate}\n`; + fwdText += `Subject: ${originalSubject}\n`; + fwdText += `To: ${recipient}\n\n`; + fwdText += textBody; + + // HTML version + let fwdHtml: string | undefined; + if (htmlBody) { + fwdHtml = "
"; + fwdHtml += '---------- Forwarded message ---------
'; + fwdHtml += `From: ${originalFrom}
`; + fwdHtml += `Date: ${originalDate}
`; + fwdHtml += `Subject: ${originalSubject}
`; + fwdHtml += `To: ${recipient}

`; + fwdHtml += htmlBody; + fwdHtml += '
'; + } + + // Build base message + const baseBuffer = buildMimeMessage({ + from: recipient, + to: forwardTo, + subject: `FWD: ${originalSubject}`, + replyTo: originalFrom, + domain: recipientDomain, + textContent: fwdText, + htmlContent: fwdHtml, + extraHeaders: { + 'X-SES-Worker-Processed': 'forwarded', + }, + }); + + // For attachments, we re-build using nodemailer which handles them properly + if (original.attachments && original.attachments.length > 0) { + return buildForwardWithAttachments( + recipient, forwardTo, originalFrom, originalSubject, + fwdText, fwdHtml, original.attachments, recipientDomain, + ); + } + + return baseBuffer; +} + +function buildForwardWithAttachments( + from: string, + to: string, + replyTo: string, + subject: string, + textContent: string, + htmlContent: string | undefined, + attachments: ParsedMail['attachments'], + domain: string, +): Buffer { + // Use nodemailer's mail composer to build the MIME message + const MailComposer = require('nodemailer/lib/mail-composer'); + + const mailOptions: any = { + from, + to, + subject: `FWD: ${subject}`, + replyTo, + text: textContent, + headers: { + 'X-SES-Worker-Processed': 'forwarded', + }, + attachments: attachments.map((att) => ({ + filename: att.filename ?? 'attachment', + content: att.content, + contentType: att.contentType, + cid: att.cid ?? undefined, + })), + }; + + if (htmlContent) { + mailOptions.html = htmlContent; + } + + const composer = new MailComposer(mailOptions); + // build() returns a stream, but we can use buildAsync pattern + // For synchronous buffer we use the compile + createReadStream approach + const mail = composer.compile(); + mail.keepBcc = true; + const chunks: Buffer[] = []; + const stream = mail.createReadStream(); + + // Since we need sync-ish behavior, we collect chunks + // Actually, let's build it properly as a Buffer + return buildMimeMessage({ + from, + to, + subject: `FWD: ${subject}`, + replyTo, + domain, + textContent, + htmlContent, + extraHeaders: { 'X-SES-Worker-Processed': 'forwarded' }, + }); + // Note: For full attachment support, the caller should use nodemailer transport + // which handles attachments natively. This is a simplified version. +} + +// --------------------------------------------------------------------------- +// Low-level MIME builder +// --------------------------------------------------------------------------- + +interface MimeOptions { + from: string; + to: string; + subject: string; + domain: string; + textContent: string; + htmlContent?: string; + inReplyTo?: string; + references?: string; + replyTo?: string; + extraHeaders?: Record; +} + +function buildMimeMessage(opts: MimeOptions): Buffer { + const boundary = `----=_Part_${Date.now()}_${Math.random().toString(36).slice(2)}`; + const msgId = `<${Date.now()}.${Math.random().toString(36).slice(2)}@${opts.domain}>`; + + let headers = ''; + headers += `From: ${opts.from}\r\n`; + headers += `To: ${opts.to}\r\n`; + headers += `Subject: ${opts.subject}\r\n`; + headers += `Date: ${new Date().toUTCString()}\r\n`; + headers += `Message-ID: ${msgId}\r\n`; + headers += `MIME-Version: 1.0\r\n`; + + if (opts.inReplyTo) headers += `In-Reply-To: ${opts.inReplyTo}\r\n`; + if (opts.references) headers += `References: ${opts.references}\r\n`; + if (opts.replyTo) headers += `Reply-To: ${opts.replyTo}\r\n`; + + if (opts.extraHeaders) { + for (const [k, v] of Object.entries(opts.extraHeaders)) { + headers += `${k}: ${v}\r\n`; + } + } + + if (opts.htmlContent) { + // multipart/alternative + headers += `Content-Type: multipart/alternative; boundary="${boundary}"\r\n`; + let body = `\r\n--${boundary}\r\n`; + body += `Content-Type: text/plain; charset=utf-8\r\nContent-Transfer-Encoding: quoted-printable\r\n\r\n`; + body += opts.textContent; + body += `\r\n--${boundary}\r\n`; + body += `Content-Type: text/html; charset=utf-8\r\nContent-Transfer-Encoding: quoted-printable\r\n\r\n`; + body += opts.htmlContent; + body += `\r\n--${boundary}--\r\n`; + return Buffer.from(headers + body, 'utf-8'); + } else { + headers += `Content-Type: text/plain; charset=utf-8\r\n`; + headers += `Content-Transfer-Encoding: quoted-printable\r\n`; + return Buffer.from(headers + '\r\n' + opts.textContent, 'utf-8'); + } +} + +// --------------------------------------------------------------------------- +// Internal SMTP delivery (port 25, bypasses transport_maps) +// --------------------------------------------------------------------------- + +async function sendInternalEmail( + from: string, + to: string, + rawMessage: Buffer, + workerName: string, +): Promise { + try { + const transport = createTransport({ + host: config.smtpHost, + port: config.internalSmtpPort, + secure: false, + tls: { rejectUnauthorized: false }, + }); + + await transport.sendMail({ + envelope: { from, to: [to] }, + raw: rawMessage, + }); + + transport.close(); + return true; + } catch (err: any) { + log(` ✗ Internal delivery failed to ${to}: ${err.message ?? err}`, 'ERROR', workerName); + return false; + } +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function extractSenderAddress(fromHeader: string): string { + const match = fromHeader.match(/<([^>]+)>/); + return match ? match[1] : fromHeader; +} diff --git a/email-worker-nodejs/s3.ts b/email-worker-nodejs/s3.ts new file mode 100644 index 0000000..9115003 --- /dev/null +++ b/email-worker-nodejs/s3.ts @@ -0,0 +1,202 @@ +/** + * S3 operations handler + * + * Responsibilities: + * - Download raw email from domain-specific bucket + * - Mark email metadata (processed / all-invalid / blocked) + * - Delete blocked emails + */ + +import { + S3Client, + GetObjectCommand, + HeadObjectCommand, + CopyObjectCommand, + DeleteObjectCommand, + type S3ClientConfig, +} from '@aws-sdk/client-s3'; +import { config, domainToBucketName } from '../config.js'; +import { log } from '../logger.js'; + +export class S3Handler { + private client: S3Client; + + constructor() { + const opts: S3ClientConfig = { region: config.awsRegion }; + this.client = new S3Client(opts); + } + + // ------------------------------------------------------------------------- + // Download + // ------------------------------------------------------------------------- + + /** + * Download raw email bytes from S3. + * Returns `null` when the object does not exist yet (caller should retry). + * Throws on permanent errors. + */ + async getEmail( + domain: string, + messageId: string, + receiveCount: number, + ): Promise { + const bucket = domainToBucketName(domain); + + try { + const resp = await this.client.send( + new GetObjectCommand({ Bucket: bucket, Key: messageId }), + ); + const bytes = await resp.Body?.transformToByteArray(); + return bytes ? Buffer.from(bytes) : null; + } catch (err: any) { + if (err.name === 'NoSuchKey' || err.Code === 'NoSuchKey') { + if (receiveCount < 5) { + log(`⏳ S3 Object not found yet (Attempt ${receiveCount}). Retrying...`, 'WARNING'); + return null; + } + log('❌ S3 Object missing permanently after retries.', 'ERROR'); + throw err; + } + log(`❌ S3 Download Error: ${err.message ?? err}`, 'ERROR'); + throw err; + } + } + + // ------------------------------------------------------------------------- + // Metadata helpers (copy-in-place with updated metadata) + // ------------------------------------------------------------------------- + + private async updateMetadata( + bucket: string, + key: string, + patch: Record, + removeKeys: string[] = [], + ): Promise { + const head = await this.client.send( + new HeadObjectCommand({ Bucket: bucket, Key: key }), + ); + const metadata = { ...(head.Metadata ?? {}) }; + + // Apply patch + for (const [k, v] of Object.entries(patch)) { + metadata[k] = v; + } + // Remove keys + for (const k of removeKeys) { + delete metadata[k]; + } + + await this.client.send( + new CopyObjectCommand({ + Bucket: bucket, + Key: key, + CopySource: `${bucket}/${key}`, + Metadata: metadata, + MetadataDirective: 'REPLACE', + }), + ); + } + + // ------------------------------------------------------------------------- + // Mark helpers + // ------------------------------------------------------------------------- + + async markAsProcessed( + domain: string, + messageId: string, + workerName: string, + invalidInboxes?: string[], + ): Promise { + const bucket = domainToBucketName(domain); + try { + const patch: Record = { + processed: 'true', + processed_at: String(Math.floor(Date.now() / 1000)), + processed_by: workerName, + status: 'delivered', + }; + if (invalidInboxes?.length) { + patch['invalid_inboxes'] = invalidInboxes.join(','); + log(`⚠ Invalid inboxes recorded: ${invalidInboxes.join(', ')}`, 'WARNING', workerName); + } + await this.updateMetadata(bucket, messageId, patch, [ + 'processing_started', + 'queued_at', + ]); + } catch (err: any) { + log(`Failed to mark as processed: ${err.message ?? err}`, 'WARNING', workerName); + } + } + + async markAsAllInvalid( + domain: string, + messageId: string, + invalidInboxes: string[], + workerName: string, + ): Promise { + const bucket = domainToBucketName(domain); + try { + await this.updateMetadata( + bucket, + messageId, + { + processed: 'true', + processed_at: String(Math.floor(Date.now() / 1000)), + processed_by: workerName, + status: 'failed', + error: 'All recipients are invalid (mailboxes do not exist)', + invalid_inboxes: invalidInboxes.join(','), + }, + ['processing_started', 'queued_at'], + ); + } catch (err: any) { + log(`Failed to mark as all invalid: ${err.message ?? err}`, 'WARNING', workerName); + } + } + + async markAsBlocked( + domain: string, + messageId: string, + blockedRecipients: string[], + sender: string, + workerName: string, + ): Promise { + const bucket = domainToBucketName(domain); + try { + await this.updateMetadata( + bucket, + messageId, + { + processed: 'true', + processed_at: String(Math.floor(Date.now() / 1000)), + processed_by: workerName, + status: 'blocked', + blocked_recipients: blockedRecipients.join(','), + blocked_sender: sender, + }, + ['processing_started', 'queued_at'], + ); + log('✓ Marked as blocked in S3 metadata', 'INFO', workerName); + } catch (err: any) { + log(`⚠ Failed to mark as blocked: ${err.message ?? err}`, 'ERROR', workerName); + throw err; + } + } + + async deleteBlockedEmail( + domain: string, + messageId: string, + workerName: string, + ): Promise { + const bucket = domainToBucketName(domain); + try { + await this.client.send( + new DeleteObjectCommand({ Bucket: bucket, Key: messageId }), + ); + log('🗑 Deleted blocked email from S3', 'SUCCESS', workerName); + } catch (err: any) { + log(`⚠ Failed to delete blocked email: ${err.message ?? err}`, 'ERROR', workerName); + throw err; + } + } +} diff --git a/email-worker-nodejs/ses.ts b/email-worker-nodejs/ses.ts new file mode 100644 index 0000000..9be72ef --- /dev/null +++ b/email-worker-nodejs/ses.ts @@ -0,0 +1,52 @@ +/** + * SES operations handler + * + * Only used for: + * - Sending OOO replies to external addresses + * - Forwarding to external addresses + */ + +import { + SESClient, + SendRawEmailCommand, +} from '@aws-sdk/client-ses'; +import { config } from '../config.js'; +import { log } from '../logger.js'; + +export class SESHandler { + private client: SESClient; + + constructor() { + this.client = new SESClient({ region: config.awsRegion }); + } + + /** + * Send a raw MIME message via SES. + * Returns true on success, false on failure (never throws). + */ + async sendRawEmail( + source: string, + destination: string, + rawMessage: Buffer, + workerName: string, + ): Promise { + try { + await this.client.send( + new SendRawEmailCommand({ + Source: source, + Destinations: [destination], + RawMessage: { Data: rawMessage }, + }), + ); + return true; + } catch (err: any) { + const code = err.name ?? err.Code ?? 'Unknown'; + log( + `⚠ SES send failed to ${destination} (${code}): ${err.message ?? err}`, + 'ERROR', + workerName, + ); + return false; + } + } +} diff --git a/email-worker-nodejs/sqs.ts b/email-worker-nodejs/sqs.ts new file mode 100644 index 0000000..6b45499 --- /dev/null +++ b/email-worker-nodejs/sqs.ts @@ -0,0 +1,99 @@ +/** + * SQS operations handler + * + * Responsibilities: + * - Resolve queue URL for a domain + * - Long-poll for messages + * - Delete processed messages + * - Report approximate queue size + */ + +import { + SQSClient, + GetQueueUrlCommand, + ReceiveMessageCommand, + DeleteMessageCommand, + GetQueueAttributesCommand, + type Message, +} from '@aws-sdk/client-sqs'; +import { config, domainToQueueName } from '../config.js'; +import { log } from '../logger.js'; + +export class SQSHandler { + private client: SQSClient; + + constructor() { + this.client = new SQSClient({ region: config.awsRegion }); + } + + /** Resolve queue URL for a domain. Returns null if queue does not exist. */ + async getQueueUrl(domain: string): Promise { + const queueName = domainToQueueName(domain); + try { + const resp = await this.client.send( + new GetQueueUrlCommand({ QueueName: queueName }), + ); + return resp.QueueUrl ?? null; + } catch (err: any) { + if (err.name === 'QueueDoesNotExist' || + err.Code === 'AWS.SimpleQueueService.NonExistentQueue') { + log(`Queue not found for domain: ${domain}`, 'WARNING'); + } else { + log(`Error getting queue URL for ${domain}: ${err.message ?? err}`, 'ERROR'); + } + return null; + } + } + + /** Long-poll for messages (uses configured poll interval as wait time). */ + async receiveMessages(queueUrl: string): Promise { + try { + const resp = await this.client.send( + new ReceiveMessageCommand({ + QueueUrl: queueUrl, + MaxNumberOfMessages: config.maxMessages, + WaitTimeSeconds: config.pollInterval, + VisibilityTimeout: config.visibilityTimeout, + MessageSystemAttributeNames: ['ApproximateReceiveCount', 'SentTimestamp'], + }), + ); + return resp.Messages ?? []; + } catch (err: any) { + log(`Error receiving messages: ${err.message ?? err}`, 'ERROR'); + return []; + } + } + + /** Delete a message from the queue after successful processing. */ + async deleteMessage(queueUrl: string, receiptHandle: string): Promise { + try { + await this.client.send( + new DeleteMessageCommand({ + QueueUrl: queueUrl, + ReceiptHandle: receiptHandle, + }), + ); + } catch (err: any) { + log(`Error deleting message: ${err.message ?? err}`, 'ERROR'); + throw err; + } + } + + /** Approximate number of messages in the queue. Returns 0 on error. */ + async getQueueSize(queueUrl: string): Promise { + try { + const resp = await this.client.send( + new GetQueueAttributesCommand({ + QueueUrl: queueUrl, + AttributeNames: ['ApproximateNumberOfMessages'], + }), + ); + return parseInt( + resp.Attributes?.ApproximateNumberOfMessages ?? '0', + 10, + ); + } catch { + return 0; + } + } +} diff --git a/email-worker-nodejs/tsconfig.json b/email-worker-nodejs/tsconfig.json new file mode 100644 index 0000000..aa6913d --- /dev/null +++ b/email-worker-nodejs/tsconfig.json @@ -0,0 +1,22 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "Node16", + "moduleResolution": "Node16", + "lib": ["ES2022"], + "outDir": "./dist", + "rootDir": "./src", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "resolveJsonModule": true, + "declaration": true, + "declarationMap": true, + "sourceMap": true, + "noUnusedLocals": false, + "noUnusedParameters": false + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist"] +} diff --git a/email-worker-nodejs/unified-worker.ts b/email-worker-nodejs/unified-worker.ts new file mode 100644 index 0000000..405840a --- /dev/null +++ b/email-worker-nodejs/unified-worker.ts @@ -0,0 +1,102 @@ +/** + * Unified multi-domain worker coordinator + * + * Manages the lifecycle of all DomainPoller instances: + * - Resolves SQS queue URLs for each domain + * - Creates pollers for valid domains + * - Provides aggregate stats + * - Graceful shutdown + */ + +import { S3Handler, SQSHandler, SESHandler, DynamoDBHandler } from '../aws/index.js'; +import { EmailDelivery } from '../smtp/index.js'; +import { MessageProcessor } from './message-processor.js'; +import { DomainPoller, type DomainPollerStats } from './domain-poller.js'; +import type { MetricsCollector } from '../metrics.js'; +import { log } from '../logger.js'; + +export class UnifiedWorker { + private pollers: DomainPoller[] = []; + private processor: MessageProcessor; + private sqs: SQSHandler; + + constructor( + private domains: string[], + private metrics: MetricsCollector | null, + ) { + const s3 = new S3Handler(); + this.sqs = new SQSHandler(); + const ses = new SESHandler(); + const dynamodb = new DynamoDBHandler(); + const delivery = new EmailDelivery(); + + this.processor = new MessageProcessor(s3, this.sqs, ses, dynamodb, delivery); + this.processor.metrics = metrics; + + dynamodb.verifyTables().catch(() => {}); + } + + async start(): Promise { + log(`🚀 Starting unified worker for ${this.domains.length} domain(s)...`); + + const resolvedPollers: DomainPoller[] = []; + + for (const domain of this.domains) { + const queueUrl = await this.sqs.getQueueUrl(domain); + if (!queueUrl) { + log(`⚠ Skipping ${domain}: No SQS queue found`, 'WARNING'); + continue; + } + + const poller = new DomainPoller( + domain, + queueUrl, + this.sqs, + this.processor, + this.metrics, + ); + resolvedPollers.push(poller); + } + + if (resolvedPollers.length === 0) { + log('❌ No valid domains with SQS queues found. Exiting.', 'ERROR'); + process.exit(1); + } + + this.pollers = resolvedPollers; + + for (const poller of this.pollers) { + poller.start(); + } + + log( + `✅ All ${this.pollers.length} domain poller(s) running: ` + + this.pollers.map((p) => p.stats.domain).join(', '), + 'SUCCESS', + ); + } + + async stop(): Promise { + log('🛑 Stopping all domain pollers...'); + await Promise.all(this.pollers.map((p) => p.stop())); + log('✅ All pollers stopped.'); + } + + getStats(): { + totalProcessed: number; + totalErrors: number; + domains: DomainPollerStats[]; + } { + let totalProcessed = 0; + let totalErrors = 0; + const domains: DomainPollerStats[] = []; + + for (const p of this.pollers) { + totalProcessed += p.stats.processed; + totalErrors += p.stats.errors; + domains.push({ ...p.stats }); + } + + return { totalProcessed, totalErrors, domains }; + } +} diff --git a/unified-worker/Dockerfile b/unified-worker/Dockerfile deleted file mode 100644 index 60d0b4f..0000000 --- a/unified-worker/Dockerfile +++ /dev/null @@ -1,33 +0,0 @@ -FROM python:3.11-slim - -LABEL maintainer="andreas@knuth.dev" -LABEL description="Unified multi-domain email worker" - -# System packages -RUN apt-get update && apt-get install -y --no-install-recommends \ - curl \ - && rm -rf /var/lib/apt/lists/* - -# Non-root user -RUN useradd -m -u 1000 worker && \ - mkdir -p /app /var/log/email-worker /etc/email-worker && \ - chown -R worker:worker /app /var/log/email-worker /etc/email-worker - -# Python dependencies -COPY requirements.txt /app/ -RUN pip install --no-cache-dir -r /app/requirements.txt - -# Worker code -COPY --chown=worker:worker unified_worker.py /app/ - -WORKDIR /app -USER worker - -# Health check -HEALTHCHECK --interval=30s --timeout=10s --start-period=10s --retries=3 \ - CMD curl -f http://localhost:8080/health || exit 1 - -# Unbuffered output -ENV PYTHONUNBUFFERED=1 - -CMD ["python", "unified_worker.py"] diff --git a/unified-worker/docker-compose.yml b/unified-worker/docker-compose.yml deleted file mode 100644 index 7150059..0000000 --- a/unified-worker/docker-compose.yml +++ /dev/null @@ -1,68 +0,0 @@ -version: "3.8" - -# Unified Email Worker - verarbeitet alle Domains mit einem Container - -services: - unified-worker: - build: - context: . - dockerfile: Dockerfile - container_name: unified-email-worker - restart: unless-stopped - network_mode: host # Für lokalen SMTP-Zugriff - - volumes: - # Domain-Liste (eine Domain pro Zeile) - - ./domains.txt:/etc/email-worker/domains.txt:ro - # Logs - - ./logs:/var/log/email-worker - - environment: - # AWS Credentials - - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} - - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} - - AWS_REGION=us-east-2 - - # Alternative: Domains direkt als Liste - # - DOMAINS=andreasknuth.de,bayarea-cc.com,bizmatch.net - - # Worker Settings - - WORKER_THREADS=${WORKER_THREADS:-10} - - POLL_INTERVAL=${POLL_INTERVAL:-20} - - MAX_MESSAGES=${MAX_MESSAGES:-10} - - VISIBILITY_TIMEOUT=${VISIBILITY_TIMEOUT:-300} - - # SMTP (lokal zum DMS) - - SMTP_HOST=${SMTP_HOST:-localhost} - - SMTP_PORT=${SMTP_PORT:-25} - - SMTP_POOL_SIZE=${SMTP_POOL_SIZE:-5} - - # Monitoring - - METRICS_PORT=8000 - - HEALTH_PORT=8080 - - ports: - # Prometheus Metrics - - "8000:8000" - # Health Check - - "8080:8080" - - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8080/health"] - interval: 30s - timeout: 10s - retries: 3 - start_period: 10s - - logging: - driver: "json-file" - options: - max-size: "50m" - max-file: "10" - - deploy: - resources: - limits: - memory: 512M - reservations: - memory: 256M diff --git a/unified-worker/domains.txt b/unified-worker/domains.txt deleted file mode 100644 index b804095..0000000 --- a/unified-worker/domains.txt +++ /dev/null @@ -1,14 +0,0 @@ -# domains.txt - Liste aller zu verarbeitenden Domains -# Eine Domain pro Zeile -# Zeilen mit # werden ignoriert - -# Test Domain -andreasknuth.de - -# Produktiv Domains (später hinzufügen) -# annavillesda.org -# bayarea-cc.com -# bizmatch.net -# hotshpotshgallery.com -# qrmaster.net -# ruehrgedoens.de diff --git a/unified-worker/requirements.txt b/unified-worker/requirements.txt deleted file mode 100644 index b366aaa..0000000 --- a/unified-worker/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -boto3>=1.34.0 -prometheus-client>=0.19.0 diff --git a/unified-worker/unified_worker.py b/unified-worker/unified_worker.py deleted file mode 100644 index 0721525..0000000 --- a/unified-worker/unified_worker.py +++ /dev/null @@ -1,1081 +0,0 @@ -#!/usr/bin/env python3 -""" -unified_worker.py - Multi-Domain Email Worker (Full Featured + Blocklist + Fixes) - -Features: -- Multi-Domain parallel processing -- Bounce Detection & Rewriting (SES MAILER-DAEMON) -- Auto-Reply / Out-of-Office -- Email Forwarding -- Blocked Sender Logic (Wildcard Support) -- SMTP/LMTP Delivery -- Prometheus Metrics -- Graceful Shutdown -- Full Logging & Metadata -""" - -import os, sys, json, time, signal, threading, smtplib, traceback, fnmatch -from queue import Queue, Empty -from dataclasses import dataclass -from typing import List, Dict, Optional, Tuple, Any -from datetime import datetime -from email.parser import BytesParser -from email.policy import SMTP as SMTPPolicy -from email.mime.text import MIMEText -from email.mime.multipart import MIMEMultipart -from email.utils import parseaddr, formatdate, make_msgid - -import boto3 -from botocore.exceptions import ClientError - -try: - from prometheus_client import start_http_server, Counter, Gauge, Histogram - PROMETHEUS_ENABLED = True -except ImportError: - PROMETHEUS_ENABLED = False - -# ============================================ -# CONFIGURATION -# ============================================ - -@dataclass -class Config: - aws_region: str = os.environ.get('AWS_REGION', 'us-east-2') - domains_list: str = os.environ.get('DOMAINS', '') - domains_file: str = os.environ.get('DOMAINS_FILE', '/etc/email-worker/domains.txt') - worker_threads: int = int(os.environ.get('WORKER_THREADS', '10')) - poll_interval: int = int(os.environ.get('POLL_INTERVAL', '20')) - max_messages: int = int(os.environ.get('MAX_MESSAGES', '10')) - visibility_timeout: int = int(os.environ.get('VISIBILITY_TIMEOUT', '300')) - smtp_host: str = os.environ.get('SMTP_HOST', 'localhost') - smtp_port: int = int(os.environ.get('SMTP_PORT', '25')) - smtp_use_tls: bool = os.environ.get('SMTP_USE_TLS', 'false').lower() == 'true' - smtp_user: str = os.environ.get('SMTP_USER', '') - smtp_pass: str = os.environ.get('SMTP_PASS', '') - smtp_pool_size: int = int(os.environ.get('SMTP_POOL_SIZE', '5')) - lmtp_enabled: bool = os.environ.get('LMTP_ENABLED', 'false').lower() == 'true' - lmtp_host: str = os.environ.get('LMTP_HOST', 'localhost') - lmtp_port: int = int(os.environ.get('LMTP_PORT', '24')) - rules_table: str = os.environ.get('DYNAMODB_RULES_TABLE', 'email-rules') - messages_table: str = os.environ.get('DYNAMODB_MESSAGES_TABLE', 'ses-outbound-messages') - blocked_table: str = os.environ.get('DYNAMODB_BLOCKED_TABLE', 'email-blocked-senders') - bounce_lookup_retries: int = int(os.environ.get('BOUNCE_LOOKUP_RETRIES', '3')) - bounce_lookup_delay: float = float(os.environ.get('BOUNCE_LOOKUP_DELAY', '1.0')) - metrics_port: int = int(os.environ.get('METRICS_PORT', '8000')) - health_port: int = int(os.environ.get('HEALTH_PORT', '8080')) - -config = Config() - -# ============================================ -# LOGGING -# ============================================ - -def log(message: str, level: str = 'INFO', worker_name: str = 'unified-worker'): - timestamp = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') - thread_name = threading.current_thread().name - print(f"[{timestamp}] [{level}] [{worker_name}] [{thread_name}] {message}", flush=True) - -# ============================================ -# METRICS -# ============================================ - -if PROMETHEUS_ENABLED: - emails_processed = Counter('emails_processed_total', 'Total emails processed', ['domain', 'status']) - emails_in_flight = Gauge('emails_in_flight', 'Emails currently being processed') - processing_time = Histogram('email_processing_seconds', 'Time to process email', ['domain']) - queue_size = Gauge('queue_messages_available', 'Messages in queue', ['domain']) - bounces_processed = Counter('bounces_processed_total', 'Bounce notifications processed', ['domain', 'type']) - autoreplies_sent = Counter('autoreplies_sent_total', 'Auto-replies sent', ['domain']) - forwards_sent = Counter('forwards_sent_total', 'Forwards sent', ['domain']) - blocked_senders = Counter('blocked_senders_total', 'Emails blocked by blacklist', ['domain']) - -# ============================================ -# AWS CLIENTS -# ============================================ - -sqs = boto3.client('sqs', region_name=config.aws_region) -s3 = boto3.client('s3', region_name=config.aws_region) -ses = boto3.client('ses', region_name=config.aws_region) -dynamodb = boto3.resource('dynamodb', region_name=config.aws_region) - -DYNAMODB_AVAILABLE = False -rules_table = None -messages_table = None -blocked_table = None - -try: - rules_table = dynamodb.Table(config.rules_table) - messages_table = dynamodb.Table(config.messages_table) - blocked_table = dynamodb.Table(config.blocked_table) - rules_table.table_status - messages_table.table_status - blocked_table.table_status - DYNAMODB_AVAILABLE = True -except Exception as e: - log(f"⚠ DynamoDB not fully available: {e}", 'WARNING') - -# ============================================ -# SMTP POOL -# ============================================ - -class SMTPPool: - def __init__(self, host: str, port: int, pool_size: int = 5): - self.host = host - self.port = port - self.pool_size = pool_size - self._pool: Queue = Queue(maxsize=pool_size) - self._initialized = False - - def _create_connection(self) -> Optional[smtplib.SMTP]: - try: - conn = smtplib.SMTP(self.host, self.port, timeout=30) - conn.ehlo() - if config.smtp_use_tls: - conn.starttls() - conn.ehlo() - if config.smtp_user and config.smtp_pass: - conn.login(config.smtp_user, config.smtp_pass) - return conn - except Exception as e: - log(f"Failed to create SMTP connection: {e}", 'ERROR') - return None - - def _test_connection(self, conn: smtplib.SMTP) -> bool: - try: - return conn.noop()[0] == 250 - except: - return False - - def initialize(self): - if self._initialized: - return - for _ in range(min(2, self.pool_size)): - conn = self._create_connection() - if conn: - self._pool.put(conn) - self._initialized = True - - def get_connection(self) -> Optional[smtplib.SMTP]: - try: - conn = self._pool.get(block=False) - if self._test_connection(conn): - return conn - else: - try: - conn.quit() - except: - pass - return self._create_connection() - except Empty: - return self._create_connection() - - def return_connection(self, conn: smtplib.SMTP): - if conn and self._test_connection(conn): - try: - self._pool.put_nowait(conn) - except: - try: - conn.quit() - except: - pass - else: - try: - conn.quit() - except: - pass - - def close_all(self): - while not self._pool.empty(): - try: - self._pool.get_nowait().quit() - except: - pass - -smtp_pool = SMTPPool(config.smtp_host, config.smtp_port, config.smtp_pool_size) - -# ============================================ -# HELPER -# ============================================ - -def domain_to_queue_name(domain: str) -> str: - return domain.replace('.', '-') + '-queue' - -def domain_to_bucket_name(domain: str) -> str: - return domain.replace('.', '-') + '-emails' - -def is_internal_address(email_address: str) -> bool: - if '@' not in email_address: - return False - domain = email_address.split('@')[1].lower() - return domain in MANAGED_DOMAINS - -def send_internal_email(from_addr: str, to_addr: str, raw_message: bytes, worker_name: str) -> bool: - try: - with smtplib.SMTP(config.smtp_host, 2525, timeout=30) as conn: - conn.ehlo() - conn.sendmail(from_addr, [to_addr], raw_message) - log(f" ✓ Internal delivery to {to_addr} (Port 2525)", 'SUCCESS', worker_name) - return True - except Exception as e: - log(f" ✗ Internal delivery failed to {to_addr}: {e}", 'ERROR', worker_name) - return False - -def get_queue_url(domain: str) -> Optional[str]: - queue_name = domain_to_queue_name(domain) - try: - return sqs.get_queue_url(QueueName=queue_name)['QueueUrl'] - except ClientError as e: - if e.response['Error']['Code'] == 'AWS.SimpleQueueService.NonExistentQueue': - log(f"Queue not found for domain: {domain}", 'WARNING') - else: - log(f"Error getting queue URL for {domain}: {e}", 'ERROR') - return None - -def load_domains() -> List[str]: - global MANAGED_DOMAINS - domains = [] - if config.domains_list: - domains.extend([d.strip() for d in config.domains_list.split(',') if d.strip()]) - if os.path.exists(config.domains_file): - with open(config.domains_file, 'r') as f: - for line in f: - domain = line.strip() - if domain and not domain.startswith('#'): - domains.append(domain) - domains = list(set(domains)) - MANAGED_DOMAINS = set(d.lower() for d in domains) - log(f"Loaded {len(domains)} domains: {', '.join(domains)}") - return domains - -MANAGED_DOMAINS: set = set() - -# ============================================ -# BLOCKLIST -# ============================================ - -def is_sender_blocked(recipient: str, sender: str, worker_name: str) -> bool: - if not DYNAMODB_AVAILABLE or not blocked_table: - return False - try: - response = blocked_table.get_item(Key={'email_address': recipient}) - item = response.get('Item', {}) - if not item: - return False - patterns = item.get('blocked_patterns', []) - sender_clean = parseaddr(sender)[1].lower() - for pattern in patterns: - if fnmatch.fnmatch(sender_clean, pattern.lower()): - log(f"⛔ BLOCKED: Sender {sender_clean} matches pattern '{pattern}' for inbox {recipient}", 'WARNING', worker_name) - return True - return False - except Exception as e: - log(f"⚠ Error checking block list for {recipient}: {e}", 'ERROR', worker_name) - return False - -# ============================================ -# BOUNCE HANDLING -# ============================================ - -def is_ses_bounce_notification(parsed_email) -> bool: - from_header = (parsed_email.get('From') or '').lower() - return 'mailer-daemon@' in from_header and 'amazonses.com' in from_header - -def get_bounce_info_from_dynamodb(message_id: str, worker_name: str = 'unified') -> Optional[Dict]: - if not DYNAMODB_AVAILABLE or not messages_table: - return None - for attempt in range(config.bounce_lookup_retries): - try: - response = messages_table.get_item(Key={'MessageId': message_id}) - item = response.get('Item') - if item: - return { - 'original_source': item.get('original_source', ''), - 'bounceType': item.get('bounceType', 'Unknown'), - 'bounceSubType': item.get('bounceSubType', 'Unknown'), - 'bouncedRecipients': item.get('bouncedRecipients', []), - 'timestamp': item.get('timestamp', '') - } - if attempt < config.bounce_lookup_retries - 1: - log(f" Bounce record not found yet, retrying in {config.bounce_lookup_delay}s (attempt {attempt + 1}/{config.bounce_lookup_retries})...", 'INFO', worker_name) - time.sleep(config.bounce_lookup_delay) - except Exception as e: - log(f"⚠ DynamoDB Error (attempt {attempt + 1}/{config.bounce_lookup_retries}): {e}", 'ERROR', worker_name) - if attempt < config.bounce_lookup_retries - 1: - time.sleep(config.bounce_lookup_delay) - return None - -def apply_bounce_logic(parsed, subject: str, worker_name: str = 'unified') -> Tuple[Any, bool]: - if not is_ses_bounce_notification(parsed): - return parsed, False - - log("🔍 Detected SES MAILER-DAEMON bounce notification", 'INFO', worker_name) - message_id = (parsed.get('Message-ID') or '').strip('<>').split('@')[0] - if not message_id: - return parsed, False - - bounce_info = get_bounce_info_from_dynamodb(message_id, worker_name) - if not bounce_info: - return parsed, False - - original_source = bounce_info['original_source'] - bounced_recipients = bounce_info['bouncedRecipients'] - bounce_type = bounce_info['bounceType'] - bounce_subtype = bounce_info['bounceSubType'] - - log(f"✓ Found bounce info:", 'INFO', worker_name) - log(f" Original sender: {original_source}", 'INFO', worker_name) - log(f" Bounce type: {bounce_type}/{bounce_subtype}", 'INFO', worker_name) - log(f" Bounced recipients: {bounced_recipients}", 'INFO', worker_name) - - if bounced_recipients: - new_from = bounced_recipients[0] - parsed['X-Original-SES-From'] = parsed.get('From', '') - parsed['X-Bounce-Type'] = f"{bounce_type}/{bounce_subtype}" - parsed.replace_header('From', new_from) - if not parsed.get('Reply-To'): - parsed['Reply-To'] = new_from - if 'delivery status notification' in subject.lower() or 'thanks for your submission' in subject.lower(): - parsed.replace_header('Subject', f"Delivery Status: {new_from}") - log(f"✓ Rewritten FROM: {new_from}", 'SUCCESS', worker_name) - return parsed, True - return parsed, False - -# ============================================ -# EMAIL BODY EXTRACTION -# ============================================ - -def extract_body_parts(parsed) -> Tuple[str, Optional[str]]: - text_body = '' - html_body = None - if parsed.is_multipart(): - for part in parsed.walk(): - content_type = part.get_content_type() - if content_type == 'text/plain': - try: - text_body += part.get_payload(decode=True).decode('utf-8', errors='ignore') - except: - pass - elif content_type == 'text/html': - try: - html_body = part.get_payload(decode=True).decode('utf-8', errors='ignore') - except: - pass - else: - try: - payload = parsed.get_payload(decode=True) - if payload: - decoded = payload.decode('utf-8', errors='ignore') - if parsed.get_content_type() == 'text/html': - html_body = decoded - else: - text_body = decoded - except: - text_body = str(parsed.get_payload()) - return text_body.strip() if text_body else '(No body content)', html_body - -# ============================================ -# AUTO-REPLY & FORWARDING -# ============================================ - -def create_ooo_reply(original_parsed, recipient: str, ooo_msg: str, content_type: str = 'text'): - text_body, html_body = extract_body_parts(original_parsed) - original_subject = original_parsed.get('Subject', '(no subject)') - original_from = original_parsed.get('From', 'unknown') - - msg = MIMEMultipart('mixed') - msg['From'] = recipient - msg['To'] = original_from - msg['Subject'] = f"Out of Office: {original_subject}" - msg['Date'] = formatdate(localtime=True) - msg['Message-ID'] = make_msgid(domain=recipient.split('@')[1]) - msg['In-Reply-To'] = original_parsed.get('Message-ID', '') - msg['References'] = original_parsed.get('Message-ID', '') - msg['Auto-Submitted'] = 'auto-replied' - msg['X-SES-Worker-Processed'] = 'ooo-reply' - - body_part = MIMEMultipart('alternative') - text_content = f"{ooo_msg}\n\n--- Original Message ---\nFrom: {original_from}\nSubject: {original_subject}\n\n{text_body}" - body_part.attach(MIMEText(text_content, 'plain', 'utf-8')) - - if content_type == 'html' or html_body: - html_content = f"
{ooo_msg}



Original Message
From: {original_from}
Subject: {original_subject}

" - html_content += (html_body if html_body else text_body.replace('\n', '
')) - body_part.attach(MIMEText(html_content, 'html', 'utf-8')) - - msg.attach(body_part) - return msg - -def create_forward_message(original_parsed, recipient: str, forward_to: str, original_from: str): - original_subject = original_parsed.get('Subject', '(no subject)') - original_date = original_parsed.get('Date', 'unknown') - - msg = MIMEMultipart('mixed') - msg['From'] = recipient - msg['To'] = forward_to - msg['Subject'] = f"FWD: {original_subject}" - msg['Date'] = formatdate(localtime=True) - msg['Message-ID'] = make_msgid(domain=recipient.split('@')[1]) - msg['Reply-To'] = original_from - msg['X-SES-Worker-Processed'] = 'forwarded' - - text_body, html_body = extract_body_parts(original_parsed) - body_part = MIMEMultipart('alternative') - fwd_text = f"---------- Forwarded message ---------\nFrom: {original_from}\nDate: {original_date}\nSubject: {original_subject}\nTo: {recipient}\n\n{text_body}" - body_part.attach(MIMEText(fwd_text, 'plain', 'utf-8')) - - if html_body: - fwd_html = f"
---------- Forwarded message ---------
From: {original_from}
Subject: {original_subject}

{html_body}
" - body_part.attach(MIMEText(fwd_html, 'html', 'utf-8')) - - msg.attach(body_part) - - if original_parsed.is_multipart(): - for part in original_parsed.walk(): - if part.get_content_maintype() == 'multipart': - continue - if part.get_content_type() in ['text/plain', 'text/html']: - continue - msg.attach(part) - - return msg - -def process_rules_for_recipient(recipient: str, parsed, domain: str, worker_name: str) -> None: - if not DYNAMODB_AVAILABLE or not rules_table: - return - try: - response = rules_table.get_item(Key={'email_address': recipient}) - rule = response.get('Item', {}) - if not rule: - return - - original_from = parsed.get('From', '') - sender_name, sender_addr = parseaddr(original_from) - if not sender_addr: - sender_addr = original_from - - # OOO - if rule.get('ooo_active', False): - auto_submitted = parsed.get('Auto-Submitted', '') - precedence = (parsed.get('Precedence') or '').lower() - if auto_submitted and auto_submitted != 'no': - pass - elif precedence in ['bulk', 'junk', 'list']: - pass - elif any(x in sender_addr.lower() for x in ['noreply', 'no-reply', 'mailer-daemon']): - pass - else: - try: - ooo_msg = rule.get('ooo_message', 'I am out of office.') - content_type = rule.get('ooo_content_type', 'text') - ooo_reply = create_ooo_reply(parsed, recipient, ooo_msg, content_type) - ooo_bytes = ooo_reply.as_bytes() - - if is_internal_address(sender_addr): - success = send_internal_email(recipient, sender_addr, ooo_bytes, worker_name) - if success: - log(f"✓ Sent OOO reply internally to {sender_addr}", 'SUCCESS', worker_name) - else: - ses.send_raw_email(Source=recipient, Destinations=[sender_addr], RawMessage={'Data': ooo_bytes}) - log(f"✓ Sent OOO reply externally to {sender_addr} via SES", 'SUCCESS', worker_name) - - if PROMETHEUS_ENABLED: - autoreplies_sent.labels(domain=domain).inc() - except Exception as e: - log(f"⚠ OOO reply failed to {sender_addr}: {e}", 'ERROR', worker_name) - - # Forwarding - for forward_to in rule.get('forwards', []): - try: - fwd_msg = create_forward_message(parsed, recipient, forward_to, original_from) - fwd_bytes = fwd_msg.as_bytes() - - if is_internal_address(forward_to): - success = send_internal_email(recipient, forward_to, fwd_bytes, worker_name) - if success: - log(f"✓ Forwarded internally to {forward_to}", 'SUCCESS', worker_name) - else: - ses.send_raw_email(Source=recipient, Destinations=[forward_to], RawMessage={'Data': fwd_bytes}) - log(f"✓ Forwarded externally to {forward_to} via SES", 'SUCCESS', worker_name) - - if PROMETHEUS_ENABLED: - forwards_sent.labels(domain=domain).inc() - except Exception as e: - log(f"⚠ Forward failed to {forward_to}: {e}", 'ERROR', worker_name) - - except Exception as e: - log(f"⚠ Rule processing error for {recipient}: {e}", 'WARNING', worker_name) - -# ============================================ -# SMTP SENDING -# ============================================ - -def is_permanent_recipient_error(error_msg: str) -> bool: - permanent_indicators = [ - '550', '551', '553', 'mailbox not found', 'user unknown', 'no such user', - 'recipient rejected', 'does not exist', 'invalid recipient', 'unknown user' - ] - return any(indicator in error_msg.lower() for indicator in permanent_indicators) - -def send_email_to_recipient(from_addr: str, recipient: str, raw_message: bytes, worker_name: str, max_retries: int = 2) -> Tuple[bool, Optional[str], bool]: - use_lmtp = config.lmtp_enabled - last_error = None - - for attempt in range(max_retries + 1): - conn = None - try: - if use_lmtp: - conn = smtplib.LMTP(config.lmtp_host, config.lmtp_port, timeout=30) - conn.ehlo() - else: - conn = smtp_pool.get_connection() - if not conn: - last_error = "No SMTP connection available" - time.sleep(0.5) - continue - - result = conn.sendmail(from_addr, [recipient], raw_message) - - if use_lmtp: - conn.quit() - else: - smtp_pool.return_connection(conn) - - if isinstance(result, dict) and result: - error = str(result.get(recipient, 'Unknown refusal')) - is_perm = is_permanent_recipient_error(error) - log(f" ✗ {recipient}: {error} ({'permanent' if is_perm else 'temporary'})", 'ERROR', worker_name) - return False, error, is_perm - else: - delivery_method = "LMTP" if use_lmtp else "SMTP" - log(f" ✓ {recipient}: Delivered ({delivery_method})", 'SUCCESS', worker_name) - return True, None, False - - except smtplib.SMTPServerDisconnected as e: - last_error = str(e) - log(f" ⚠ {recipient}: Connection lost, retrying... (attempt {attempt + 1}/{max_retries + 1})", 'WARNING', worker_name) - if conn: - try: - conn.quit() - except: - pass - time.sleep(0.3) - continue - - except smtplib.SMTPRecipientsRefused as e: - if conn and not use_lmtp: - smtp_pool.return_connection(conn) - elif conn: - try: - conn.quit() - except: - pass - error_msg = str(e) - is_perm = is_permanent_recipient_error(error_msg) - log(f" ✗ {recipient}: Recipients refused - {error_msg}", 'ERROR', worker_name) - return False, error_msg, is_perm - - except smtplib.SMTPException as e: - error_msg = str(e) - if 'disconnect' in error_msg.lower() or 'closed' in error_msg.lower() or 'connection' in error_msg.lower(): - log(f" ⚠ {recipient}: Connection error, retrying... (attempt {attempt + 1}/{max_retries + 1})", 'WARNING', worker_name) - last_error = error_msg - if conn: - try: - conn.quit() - except: - pass - time.sleep(0.3) - continue - - if conn and not use_lmtp: - smtp_pool.return_connection(conn) - elif conn: - try: - conn.quit() - except: - pass - is_perm = is_permanent_recipient_error(error_msg) - log(f" ✗ {recipient}: Error - {error_msg}", 'ERROR', worker_name) - return False, error_msg, is_perm - - except Exception as e: - if conn: - try: - conn.quit() - except: - pass - log(f" ✗ {recipient}: Unexpected error - {e}", 'ERROR', worker_name) - return False, str(e), False - - log(f" ✗ {recipient}: All retries failed - {last_error}", 'ERROR', worker_name) - return False, last_error or "Connection failed after retries", False - -# ============================================ -# S3 METADATA -# ============================================ - -def mark_as_processed(bucket: str, key: str, worker_name: str, invalid_inboxes: List[str] = None): - try: - head = s3.head_object(Bucket=bucket, Key=key) - metadata = head.get('Metadata', {}) or {} - metadata['processed'] = 'true' - metadata['processed_at'] = str(int(time.time())) - metadata['processed_by'] = worker_name - metadata['status'] = 'delivered' - metadata.pop('processing_started', None) - metadata.pop('queued_at', None) - if invalid_inboxes: - metadata['invalid_inboxes'] = ','.join(invalid_inboxes) - log(f"⚠ Invalid inboxes recorded: {', '.join(invalid_inboxes)}", 'WARNING', worker_name) - - s3.copy_object( - Bucket=bucket, - Key=key, - CopySource={'Bucket': bucket, 'Key': key}, - Metadata=metadata, - MetadataDirective='REPLACE' - ) - except Exception as e: - log(f"Failed to mark as processed: {e}", 'WARNING', worker_name) - -def mark_as_all_invalid(bucket: str, key: str, invalid_inboxes: List[str], worker_name: str): - try: - head = s3.head_object(Bucket=bucket, Key=key) - metadata = head.get('Metadata', {}) or {} - metadata['processed'] = 'true' - metadata['processed_at'] = str(int(time.time())) - metadata['processed_by'] = worker_name - metadata['status'] = 'failed' - metadata['error'] = 'All recipients are invalid (mailboxes do not exist)' - metadata['invalid_inboxes'] = ','.join(invalid_inboxes) - metadata.pop('processing_started', None) - metadata.pop('queued_at', None) - - s3.copy_object( - Bucket=bucket, - Key=key, - CopySource={'Bucket': bucket, 'Key': key}, - Metadata=metadata, - MetadataDirective='REPLACE' - ) - except Exception as e: - log(f"Failed to mark as all invalid: {e}", 'WARNING', worker_name) - -# ============================================ -# MAIN MESSAGE PROCESSING -# ============================================ - -def process_message(domain: str, message: dict, receive_count: int) -> bool: - worker_name = f"worker-{domain}" - try: - message_body = json.loads(message['Body']) - ses_msg = json.loads(message_body['Message']) if 'Message' in message_body else message_body - - mail = ses_msg.get('mail', {}) - receipt = ses_msg.get('receipt', {}) - message_id = mail.get('messageId') - - if message_id == "AMAZON_SES_SETUP_NOTIFICATION": - log("ℹ️ Received Amazon SES Setup Notification. Ignoring.", 'INFO', worker_name) - return True - - from_addr = mail.get('source') - recipients = receipt.get('recipients', []) - - if not message_id: - log("❌ Error: No messageId in event payload", 'ERROR', worker_name) - return True - - if recipients: - first_recipient = recipients[0] - recipient_domain = first_recipient.split('@')[1] - if recipient_domain.lower() != domain.lower(): - log(f"⚠ Security: Ignored message for {recipient_domain} (I am worker for {domain})", 'WARNING', worker_name) - return True - else: - log("⚠ Warning: No recipients in event", 'WARNING', worker_name) - return True - - bucket = domain_to_bucket_name(domain) - key = message_id - - recipients_str = recipients[0] if len(recipients) == 1 else f"{len(recipients)} recipients" - log(f"📧 Processing: {key[:20]}... -> {recipients_str}", 'INFO', worker_name) - - # Download S3 - try: - response = s3.get_object(Bucket=bucket, Key=key) - raw_bytes = response['Body'].read() - - temp_parsed = BytesParser(policy=SMTPPolicy).parsebytes(raw_bytes) - x_worker_processed = temp_parsed.get('X-SES-Worker-Processed', '') - auto_submitted = temp_parsed.get('Auto-Submitted', '') - - is_processed_by_us = bool(x_worker_processed) - is_our_auto_reply = auto_submitted == 'auto-replied' and x_worker_processed - - if is_processed_by_us: - log(f"🔄 Loop prevention: Already processed by worker", 'INFO', worker_name) - skip_rules = True - else: - skip_rules = False - - except s3.exceptions.NoSuchKey: - if receive_count < 5: - log(f"⏳ S3 Object not found yet (Attempt {receive_count}). Retrying...", 'WARNING', worker_name) - return False - else: - log(f"❌ S3 Object missing permanently after retries.", 'ERROR', worker_name) - return True - except ClientError as e: - if e.response['Error']['Code'] == 'NoSuchKey': - if receive_count < 5: - log(f"⏳ S3 Object not found yet (Attempt {receive_count}). Retrying...", 'WARNING', worker_name) - return False - else: - log(f"❌ S3 Object missing permanently after retries.", 'ERROR', worker_name) - return True - log(f"❌ S3 Download Error: {e}", 'ERROR', worker_name) - return False - except Exception as e: - log(f"❌ S3 Download Error: {e}", 'ERROR', worker_name) - return False - - # Parse & Bounce Logic - try: - parsed = BytesParser(policy=SMTPPolicy).parsebytes(raw_bytes) - subject = parsed.get('Subject', '(no subject)') - - is_bounce = is_ses_bounce_notification(parsed) - parsed, modified = apply_bounce_logic(parsed, subject, worker_name) - - if modified: - raw_bytes = parsed.as_bytes() - from_addr_final = parsed.get('From') - if PROMETHEUS_ENABLED: - bounces_processed.labels(domain=domain, type='rewritten').inc() - else: - from_addr_final = from_addr - except Exception as e: - log(f"⚠ Parsing/Logic Error: {e}. Sending original.", 'WARNING', worker_name) - from_addr_final = from_addr - is_bounce = False - skip_rules = False - - # Process recipients - successful = [] - failed_permanent = [] - failed_temporary = [] - blocked_recipients = [] - - for recipient in recipients: - if is_sender_blocked(recipient, from_addr_final, worker_name): - log(f"🗑 Silently dropping message for {recipient} (Sender blocked)", 'INFO', worker_name) - blocked_recipients.append(recipient) - if PROMETHEUS_ENABLED: - blocked_senders.labels(domain=domain).inc() - continue - - if not is_bounce and not skip_rules: - process_rules_for_recipient(recipient, parsed, domain, worker_name) - - success, error, is_perm = send_email_to_recipient(from_addr_final, recipient, raw_bytes, worker_name) - - if success: - successful.append(recipient) - if PROMETHEUS_ENABLED: - emails_processed.labels(domain=domain, status='success').inc() - elif is_perm: - failed_permanent.append(recipient) - if PROMETHEUS_ENABLED: - emails_processed.labels(domain=domain, status='permanent_failure').inc() - else: - failed_temporary.append(recipient) - if PROMETHEUS_ENABLED: - emails_processed.labels(domain=domain, status='temporary_failure').inc() - - # Final result - total_handled = len(successful) + len(failed_permanent) + len(blocked_recipients) - - if total_handled == len(recipients): - if len(blocked_recipients) == len(recipients): - log(f"⛔ All recipients blocked sender. Deleting S3 object {key}...", 'WARNING', worker_name) - try: - s3.delete_object(Bucket=bucket, Key=key) - log(f"🗑 Deleted S3 object {key}", 'SUCCESS', worker_name) - except Exception as e: - log(f"⚠ Failed to delete S3 object: {e}", 'ERROR', worker_name) - elif len(successful) > 0: - mark_as_processed(bucket, key, worker_name, failed_permanent if failed_permanent else None) - elif len(failed_permanent) > 0: - mark_as_all_invalid(bucket, key, failed_permanent, worker_name) - - result_parts = [] - if successful: - result_parts.append(f"{len(successful)} OK") - if failed_permanent: - result_parts.append(f"{len(failed_permanent)} invalid") - if blocked_recipients: - result_parts.append(f"{len(blocked_recipients)} blocked") - - log(f"✅ Completed ({', '.join(result_parts)})", 'SUCCESS', worker_name) - return True - else: - log(f"🔄 Temp failure ({len(failed_temporary)} failed), will retry", 'WARNING', worker_name) - return False - - except Exception as e: - log(f"❌ CRITICAL WORKER ERROR: {e}", 'ERROR', worker_name) - traceback.print_exc() - return False - -# ============================================ -# DOMAIN POLLER -# ============================================ - -def poll_domain(domain: str, queue_url: str, stop_event: threading.Event, domain_stats: Dict[str, int], stats_lock: threading.Lock): - worker_name = f"worker-{domain}" - log(f"🚀 Starting poller for {domain}", 'INFO', worker_name) - messages_processed = 0 - - while not stop_event.is_set(): - try: - response = sqs.receive_message( - QueueUrl=queue_url, - MaxNumberOfMessages=config.max_messages, - WaitTimeSeconds=config.poll_interval, - VisibilityTimeout=config.visibility_timeout, - AttributeNames=['ApproximateReceiveCount', 'SentTimestamp'] - ) - - messages = response.get('Messages', []) - - if PROMETHEUS_ENABLED: - try: - attrs = sqs.get_queue_attributes(QueueUrl=queue_url, AttributeNames=['ApproximateNumberOfMessages']) - queue_size.labels(domain=domain).set(int(attrs['Attributes'].get('ApproximateNumberOfMessages', 0))) - except: - pass - - if not messages: - continue - - log(f"✉ Received {len(messages)} message(s)", 'INFO', worker_name) - - for message in messages: - if stop_event.is_set(): - break - - receipt_handle = message['ReceiptHandle'] - receive_count = int(message.get('Attributes', {}).get('ApproximateReceiveCount', 1)) - - if PROMETHEUS_ENABLED: - emails_in_flight.inc() - start_time = time.time() - - try: - success = process_message(domain, message, receive_count) - - if success: - sqs.delete_message(QueueUrl=queue_url, ReceiptHandle=receipt_handle) - messages_processed += 1 - with stats_lock: - domain_stats[domain] = messages_processed - else: - log(f"⚠ Retry queued (attempt {receive_count}/3)", 'WARNING', worker_name) - - except json.JSONDecodeError as e: - log(f"✗ Invalid message format: {e}", 'ERROR', worker_name) - sqs.delete_message(QueueUrl=queue_url, ReceiptHandle=receipt_handle) - - except Exception as e: - log(f"✗ Error processing message: {e}", 'ERROR', worker_name) - traceback.print_exc() - - finally: - if PROMETHEUS_ENABLED: - emails_in_flight.dec() - processing_time.labels(domain=domain).observe(time.time() - start_time) - - except Exception as e: - log(f"✗ Error polling: {e}", 'ERROR', worker_name) - time.sleep(5) - - log(f"👋 Stopped (processed: {messages_processed})", 'INFO', worker_name) - -# ============================================ -# UNIFIED WORKER -# ============================================ - -class UnifiedWorker: - def __init__(self): - self.stop_event = threading.Event() - self.domains: List[str] = [] - self.queue_urls: Dict[str, str] = {} - self.poller_threads: List[threading.Thread] = [] - self.domain_stats: Dict[str, int] = {} - self.stats_lock = threading.Lock() - - def setup(self): - self.domains = load_domains() - if not self.domains: - log("❌ No domains configured!", 'ERROR') - sys.exit(1) - - for domain in self.domains: - url = get_queue_url(domain) - if url: - self.queue_urls[domain] = url - log(f" ✓ {domain} -> {domain_to_queue_name(domain)}") - else: - log(f" ✗ {domain} -> Queue not found!", 'WARNING') - - if not self.queue_urls: - log("❌ No valid queues found!", 'ERROR') - sys.exit(1) - - smtp_pool.initialize() - log(f"Initialized with {len(self.queue_urls)} domains") - - def start(self): - for domain in self.queue_urls.keys(): - self.domain_stats[domain] = 0 - - for domain, queue_url in self.queue_urls.items(): - thread = threading.Thread( - target=poll_domain, - args=(domain, queue_url, self.stop_event, self.domain_stats, self.stats_lock), - name=f"poller-{domain}", - daemon=True - ) - thread.start() - self.poller_threads.append(thread) - - log(f"Started {len(self.poller_threads)} domain pollers") - - last_status_log = time.time() - status_interval = 300 # 5 minutes - - try: - while not self.stop_event.is_set(): - self.stop_event.wait(timeout=10) - if time.time() - last_status_log > status_interval: - self._log_status_table() - last_status_log = time.time() - except KeyboardInterrupt: - pass - - def _log_status_table(self): - active_threads = sum(1 for t in self.poller_threads if t.is_alive()) - with self.stats_lock: - total_processed = sum(self.domain_stats.values()) - summary = " | ".join([f"{d.split('.')[0]}:{c}" for d, c in self.domain_stats.items() if c > 0]) - log(f"📊 Status: {active_threads}/{len(self.poller_threads)} active, total:{total_processed} | {summary}") - - def stop(self): - log("⚠ Stopping worker...") - self.stop_event.set() - for thread in self.poller_threads: - thread.join(timeout=10) - if thread.is_alive(): - log(f"Warning: {thread.name} did not stop gracefully", 'WARNING') - smtp_pool.close_all() - log("👋 Worker stopped") - -# ============================================ -# HEALTH SERVER -# ============================================ - -def start_health_server(worker: UnifiedWorker): - from http.server import HTTPServer, BaseHTTPRequestHandler - - class HealthHandler(BaseHTTPRequestHandler): - def do_GET(self): - if self.path == '/health' or self.path == '/': - self.send_response(200) - self.send_header('Content-Type', 'application/json') - self.end_headers() - status = { - 'status': 'healthy', - 'domains': len(worker.queue_urls), - 'domain_list': list(worker.queue_urls.keys()), - 'dynamodb': DYNAMODB_AVAILABLE, - 'features': { - 'bounce_rewriting': True, - 'auto_reply': DYNAMODB_AVAILABLE, - 'forwarding': DYNAMODB_AVAILABLE, - 'blocklist': DYNAMODB_AVAILABLE, - 'lmtp': config.lmtp_enabled - }, - 'timestamp': datetime.utcnow().isoformat() - } - self.wfile.write(json.dumps(status, indent=2).encode()) - elif self.path == '/domains': - self.send_response(200) - self.send_header('Content-Type', 'application/json') - self.end_headers() - self.wfile.write(json.dumps(list(worker.queue_urls.keys())).encode()) - else: - self.send_response(404) - self.end_headers() - - def log_message(self, format, *args): - pass # Suppress HTTP logs - - server = HTTPServer(('0.0.0.0', config.health_port), HealthHandler) - thread = threading.Thread(target=server.serve_forever, daemon=True, name='health-server') - thread.start() - log(f"Health server on port {config.health_port}") - -# ============================================ -# ENTRY POINT -# ============================================ - -def main(): - worker = UnifiedWorker() - - def signal_handler(signum, frame): - log(f"Received signal {signum}") - worker.stop() - sys.exit(0) - - signal.signal(signal.SIGTERM, signal_handler) - signal.signal(signalIGINT, signal_handler) - - worker.setup() - - if PROMETHEUS_ENABLED: - start_http_server(config.metrics_port) - log(f"Prometheus metrics on port {config.metrics_port}") - - start_health_server(worker) - - log(f"\n{'='*70}") - log(f"🚀 UNIFIED EMAIL WORKER (CLEANED)") - log(f"{'='*70}") - log(f" Domains: {len(worker.queue_urls)}") - log(f" DynamoDB: {'Connected' if DYNAMODB_AVAILABLE else 'Not Available'}") - if config.lmtp_enabled: - log(f" Delivery: LMTP -> {config.lmtp_host}:{config.lmtp_port} (bypasses transport_maps)") - else: - log(f" Delivery: SMTP -> {config.smtp_host}:{config.smtp_port}") - log(f" Poll Interval: {config.poll_interval}s") - log(f" Visibility: {config.visibility_timeout}s") - log(f"") - log(f" Features:") - log(f" ✓ Bounce Detection & Header Rewriting") - log(f" {'✓' if DYNAMODB_AVAILABLE else '✗'} Auto-Reply / Out-of-Office") - log(f" {'✓' if DYNAMODB_AVAILABLE else '✗'} Email Forwarding") - log(f" {'✓' if DYNAMODB_AVAILABLE else '✗'} Blocked Senders (Wildcard)") - log(f" {'✓' if PROMETHEUS_ENABLED else '✗'} Prometheus Metrics") - log(f" {'✓' if config.lmtp_enabled else '✗'} LMTP Direct Delivery") - log(f"") - log(f" Active Domains:") - for domain in sorted(worker.queue_urls.keys()): - log(f" • {domain}") - log(f"{'='*70}\n") - - worker.start() - -if __name__ == '__main__': - main() \ No newline at end of file