extract_original_message_id corrected

2025-12-11 17:27:11 -06:00 · 2025-12-11 17:27:11 -06:00 · cca66b7833
parent 76debb9f7c
commit cca66b7833
1 changed files with 78 additions and 16 deletions
--- a/worker_sns.py
+++ b/worker_sns.py
@ -55,23 +55,85 @@ def is_ses_bounce_or_autoreply(parsed):
    return is_mailer_daemon or is_auto_replied

 def extract_original_message_id(parsed):
-    """Extrahiert Original-Message-ID aus Headern"""
-    in_reply_to = (parsed.get('In-Reply-To') or '').strip()
-    if in_reply_to:
-        msg_id = in_reply_to
-        if msg_id.startswith('<') and '>' in msg_id:
-            msg_id = msg_id[1:msg_id.find('>')]
-        if '@' in msg_id: msg_id = msg_id.split('@')[0]
-        return msg_id
+    """
+    Extrahiert Original SES Message-ID aus Email
+    SES Format: 010f[hex32]-[hex8]-[hex4]-[hex4]-[hex4]-[hex12]-[hex6]
+    """
+    import re
+    
+    # SES Message-ID Pattern (endet immer mit -000000)
+    ses_pattern = re.compile(r'010f[0-9a-f]{12}-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}-000000')
+    
+    # 1. Versuche Standard-Header (In-Reply-To, References)
+    for header in ['In-Reply-To', 'References']:
+        value = (parsed.get(header) or '').strip()
+        if value:
+            match = ses_pattern.search(value)
+            if match:
+                log(f"  Found Message-ID in {header}: {match.group(0)}")
+                return match.group(0)
+    
+    # 2. Durchsuche Message-ID Header (manchmal steht dort die Original-ID)
+    msg_id_header = (parsed.get('Message-ID') or '').strip()
+    if msg_id_header:
+        match = ses_pattern.search(msg_id_header)
+        if match:
+            # Aber nur wenn es nicht die ID der aktuellen Bounce-Message ist
+            # (die beginnt oft auch mit 010f...)
+            pass  # Wir überspringen das erstmal
+    
+    # 3. Durchsuche den kompletten Email-Body (inkl. ALLE Attachments/Parts)
+    # Das fängt auch attached messages, text attachments, etc. ab
+    try:
+        body_text = ''
+        
+        # Hole den kompletten Body als String
+        if parsed.is_multipart():
+            for part in parsed.walk():
+                content_type = part.get_content_type()
+                
+                # Durchsuche ALLE Parts (außer Binärdaten wie images)
+                # Text-Parts, HTML, attached messages, und auch application/* Parts
+                if content_type.startswith('text/') or \
+                   content_type == 'message/rfc822' or \
+                   content_type.startswith('application/'):
+                    try:
+                        payload = part.get_payload(decode=True)
+                        if payload:
+                            # Versuche als UTF-8, fallback auf Latin-1
+                            try:
+                                body_text += payload.decode('utf-8', errors='ignore')
+                            except:
+                                try:
+                                    body_text += payload.decode('latin-1', errors='ignore')
+                                except:
+                                    # Letzter Versuch: als ASCII mit ignore
+                                    body_text += str(payload, errors='ignore')
+                    except:
+                        # Falls decode fehlschlägt, String-Payload holen
+                        payload = part.get_payload()
+                        if isinstance(payload, str):
+                            body_text += payload
+        else:
+            # Nicht-Multipart Message
+            payload = parsed.get_payload(decode=True)
+            if payload:
+                try:
+                    body_text = payload.decode('utf-8', errors='ignore')
+                except:
+                    body_text = payload.decode('latin-1', errors='ignore')
+        
+        # Suche alle SES Message-IDs im Body
+        matches = ses_pattern.findall(body_text)
+        if matches:
+            # Nehme die ERSTE gefundene ID (meist die Original-ID)
+            # Die letzte ist oft die Bounce-Message selbst
+            log(f"  Found {len(matches)} SES Message-ID(s) in body, using first: {matches[0]}")
+            return matches[0]
+    
+    except Exception as e:
+        log(f"  Warning: Could not search body for Message-ID: {e}", 'WARNING')
    
-    # Fallback References
-    refs = (parsed.get('References') or '').strip()
-    if refs:
-        first_ref = refs.split()[0]
-        if first_ref.startswith('<') and '>' in first_ref:
-            first_ref = first_ref[1:first_ref.find('>')]
-        if '@' in first_ref: first_ref = first_ref.split('@')[0]
-        return first_ref
    return None

 def apply_bounce_logic(parsed, subject):