extract_original_message_id corrected

This commit is contained in:
Andreas Knuth 2025-12-11 17:27:11 -06:00
parent 76debb9f7c
commit cca66b7833
1 changed files with 78 additions and 16 deletions

View File

@ -55,23 +55,85 @@ def is_ses_bounce_or_autoreply(parsed):
return is_mailer_daemon or is_auto_replied
def extract_original_message_id(parsed):
"""Extrahiert Original-Message-ID aus Headern"""
in_reply_to = (parsed.get('In-Reply-To') or '').strip()
if in_reply_to:
msg_id = in_reply_to
if msg_id.startswith('<') and '>' in msg_id:
msg_id = msg_id[1:msg_id.find('>')]
if '@' in msg_id: msg_id = msg_id.split('@')[0]
return msg_id
"""
Extrahiert Original SES Message-ID aus Email
SES Format: 010f[hex32]-[hex8]-[hex4]-[hex4]-[hex4]-[hex12]-[hex6]
"""
import re
# SES Message-ID Pattern (endet immer mit -000000)
ses_pattern = re.compile(r'010f[0-9a-f]{12}-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}-000000')
# 1. Versuche Standard-Header (In-Reply-To, References)
for header in ['In-Reply-To', 'References']:
value = (parsed.get(header) or '').strip()
if value:
match = ses_pattern.search(value)
if match:
log(f" Found Message-ID in {header}: {match.group(0)}")
return match.group(0)
# 2. Durchsuche Message-ID Header (manchmal steht dort die Original-ID)
msg_id_header = (parsed.get('Message-ID') or '').strip()
if msg_id_header:
match = ses_pattern.search(msg_id_header)
if match:
# Aber nur wenn es nicht die ID der aktuellen Bounce-Message ist
# (die beginnt oft auch mit 010f...)
pass # Wir überspringen das erstmal
# 3. Durchsuche den kompletten Email-Body (inkl. ALLE Attachments/Parts)
# Das fängt auch attached messages, text attachments, etc. ab
try:
body_text = ''
# Hole den kompletten Body als String
if parsed.is_multipart():
for part in parsed.walk():
content_type = part.get_content_type()
# Durchsuche ALLE Parts (außer Binärdaten wie images)
# Text-Parts, HTML, attached messages, und auch application/* Parts
if content_type.startswith('text/') or \
content_type == 'message/rfc822' or \
content_type.startswith('application/'):
try:
payload = part.get_payload(decode=True)
if payload:
# Versuche als UTF-8, fallback auf Latin-1
try:
body_text += payload.decode('utf-8', errors='ignore')
except:
try:
body_text += payload.decode('latin-1', errors='ignore')
except:
# Letzter Versuch: als ASCII mit ignore
body_text += str(payload, errors='ignore')
except:
# Falls decode fehlschlägt, String-Payload holen
payload = part.get_payload()
if isinstance(payload, str):
body_text += payload
else:
# Nicht-Multipart Message
payload = parsed.get_payload(decode=True)
if payload:
try:
body_text = payload.decode('utf-8', errors='ignore')
except:
body_text = payload.decode('latin-1', errors='ignore')
# Suche alle SES Message-IDs im Body
matches = ses_pattern.findall(body_text)
if matches:
# Nehme die ERSTE gefundene ID (meist die Original-ID)
# Die letzte ist oft die Bounce-Message selbst
log(f" Found {len(matches)} SES Message-ID(s) in body, using first: {matches[0]}")
return matches[0]
except Exception as e:
log(f" Warning: Could not search body for Message-ID: {e}", 'WARNING')
# Fallback References
refs = (parsed.get('References') or '').strip()
if refs:
first_ref = refs.split()[0]
if first_ref.startswith('<') and '>' in first_ref:
first_ref = first_ref[1:first_ref.find('>')]
if '@' in first_ref: first_ref = first_ref.split('@')[0]
return first_ref
return None
def apply_bounce_logic(parsed, subject):