extract_original_message_id corrected

2025-12-11 17:27:11 -06:00 · 2025-12-11 17:27:11 -06:00 · cca66b7833
parent 76debb9f7c
commit cca66b7833
1 changed files with 78 additions and 16 deletions
--- a/worker_sns.py
+++ b/worker_sns.py
@ -55,23 +55,85 @@ def is_ses_bounce_or_autoreply(parsed):
    return is_mailer_daemon or is_auto_replied
 def extract_original_message_id(parsed):
-    """Extrahiert Original-Message-ID aus Headern"""
+    """
-    in_reply_to = (parsed.get('In-Reply-To') or '').strip()
+    Extrahiert Original SES Message-ID aus Email
-    if in_reply_to:
+    SES Format: 010f[hex32]-[hex8]-[hex4]-[hex4]-[hex4]-[hex12]-[hex6]
-        msg_id = in_reply_to
+    """
-        if msg_id.startswith('<') and '>' in msg_id:
+    import re
-            msg_id = msg_id[1:msg_id.find('>')]
+    
-        if '@' in msg_id: msg_id = msg_id.split('@')[0]
+    # SES Message-ID Pattern (endet immer mit -000000)
-        return msg_id
+    ses_pattern = re.compile(r'010f[0-9a-f]{12}-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}-000000')
    # 1. Versuche Standard-Header (In-Reply-To, References)
    for header in ['In-Reply-To', 'References']:
        value = (parsed.get(header) or '').strip()
        if value:
            match = ses_pattern.search(value)
            if match:
                log(f"  Found Message-ID in {header}: {match.group(0)}")
                return match.group(0)
    # 2. Durchsuche Message-ID Header (manchmal steht dort die Original-ID)
    msg_id_header = (parsed.get('Message-ID') or '').strip()
    if msg_id_header:
        match = ses_pattern.search(msg_id_header)
        if match:
            # Aber nur wenn es nicht die ID der aktuellen Bounce-Message ist
            # (die beginnt oft auch mit 010f...)
            pass  # Wir überspringen das erstmal
    # 3. Durchsuche den kompletten Email-Body (inkl. ALLE Attachments/Parts)
    # Das fängt auch attached messages, text attachments, etc. ab
    try:
        body_text = ''
        # Hole den kompletten Body als String
        if parsed.is_multipart():
            for part in parsed.walk():
                content_type = part.get_content_type()
                # Durchsuche ALLE Parts (außer Binärdaten wie images)
                # Text-Parts, HTML, attached messages, und auch application/* Parts
                if content_type.startswith('text/') or \
                   content_type == 'message/rfc822' or \
                   content_type.startswith('application/'):
                    try:
                        payload = part.get_payload(decode=True)
                        if payload:
                            # Versuche als UTF-8, fallback auf Latin-1
                            try:
                                body_text += payload.decode('utf-8', errors='ignore')
                            except:
                                try:
                                    body_text += payload.decode('latin-1', errors='ignore')
                                except:
                                    # Letzter Versuch: als ASCII mit ignore
                                    body_text += str(payload, errors='ignore')
                    except:
                        # Falls decode fehlschlägt, String-Payload holen
                        payload = part.get_payload()
                        if isinstance(payload, str):
                            body_text += payload
        else:
            # Nicht-Multipart Message
            payload = parsed.get_payload(decode=True)
            if payload:
                try:
                    body_text = payload.decode('utf-8', errors='ignore')
                except:
                    body_text = payload.decode('latin-1', errors='ignore')
        # Suche alle SES Message-IDs im Body
        matches = ses_pattern.findall(body_text)
        if matches:
            # Nehme die ERSTE gefundene ID (meist die Original-ID)
            # Die letzte ist oft die Bounce-Message selbst
            log(f"  Found {len(matches)} SES Message-ID(s) in body, using first: {matches[0]}")
            return matches[0]
    except Exception as e:
        log(f"  Warning: Could not search body for Message-ID: {e}", 'WARNING')
    # Fallback References
    refs = (parsed.get('References') or '').strip()
    if refs:
        first_ref = refs.split()[0]
        if first_ref.startswith('<') and '>' in first_ref:
            first_ref = first_ref[1:first_ref.find('>')]
        if '@' in first_ref: first_ref = first_ref.split('@')[0]
        return first_ref
    return None
 def apply_bounce_logic(parsed, subject):