diff --git a/email-worker/worker.py b/email-worker/worker.py index eb913ac..afa5885 100644 --- a/email-worker/worker.py +++ b/email-worker/worker.py @@ -14,7 +14,8 @@ from aws import S3Handler, SQSHandler, SESHandler, DynamoDBHandler from email_processing import EmailParser, BounceHandler, RulesProcessor, BlocklistChecker from smtp.delivery import EmailDelivery from metrics.prometheus import MetricsCollector - +from email.parser import BytesParser # War wahrscheinlich schon da, prüfen +from email.policy import compat32 # <--- NEU: Hinzufügen class MessageProcessor: """Processes individual email messages""" @@ -126,6 +127,33 @@ class MessageProcessor: # 5. PARSING & BOUNCE LOGIC try: + # --- FIX 2.0: Pre-Sanitize via Legacy Mode --- + # Der strikte Parser crasht SOFORT beim Zugriff auf kaputte Header. + # Wir müssen erst "nachsichtig" parsen, reparieren und Bytes neu generieren. + try: + # 1. Parsen im Compat32-Modus (ignoriert Syntaxfehler) + lenient_parser = BytesParser(policy=compat32) + temp_msg = lenient_parser.parsebytes(raw_bytes) + + # 2. Prüfen und Reparieren + bad_msg_id = temp_msg.get('Message-ID', '') + if bad_msg_id and ('[' in bad_msg_id or ']' in bad_msg_id): + clean_id = bad_msg_id.replace('[', '').replace(']', '') + temp_msg.replace_header('Message-ID', clean_id) + + # 3. Bytes mit repariertem Header neu schreiben + raw_bytes = temp_msg.as_bytes() + log(f" 🔧 Sanitized malformed Message-ID via Legacy Mode: {clean_id}", 'INFO', worker_name) + + if self.metrics: + self.metrics.increment_bounce(domain, 'sanitized_header') + + except Exception as e_sanitize: + # Sollte nicht passieren, aber wir wollen hier nicht abbrechen + log(f" ⚠ Sanitization warning: {e_sanitize}", 'WARNING', worker_name) + # --------------------------------------------- + + parsed = self.parser.parse_bytes(raw_bytes) # --- FIX START: Sanitize Malformed Headers ---