email-amazon/unified-worker/email-worker/email/parser.py

81 lines
2.6 KiB
Python

#!/usr/bin/env python3
"""
Email parsing utilities
"""
from typing import Tuple, Optional
from email.parser import BytesParser
from email.policy import SMTP as SMTPPolicy
class EmailParser:
"""Email parsing utilities"""
@staticmethod
def parse_bytes(raw_bytes: bytes):
"""Parse raw email bytes into email.message object"""
return BytesParser(policy=SMTPPolicy).parsebytes(raw_bytes)
@staticmethod
def extract_body_parts(parsed) -> Tuple[str, Optional[str]]:
"""
Extract both text/plain and text/html body parts
Args:
parsed: Parsed email message object
Returns:
Tuple of (text_body, html_body or None)
"""
text_body = ''
html_body = None
if parsed.is_multipart():
for part in parsed.walk():
content_type = part.get_content_type()
if content_type == 'text/plain':
try:
text_body += part.get_payload(decode=True).decode('utf-8', errors='ignore')
except Exception:
pass
elif content_type == 'text/html':
try:
html_body = part.get_payload(decode=True).decode('utf-8', errors='ignore')
except Exception:
pass
else:
try:
payload = parsed.get_payload(decode=True)
if payload:
decoded = payload.decode('utf-8', errors='ignore')
if parsed.get_content_type() == 'text/html':
html_body = decoded
else:
text_body = decoded
except Exception:
text_body = str(parsed.get_payload())
return text_body.strip() if text_body else '(No body content)', html_body
@staticmethod
def is_processed_by_worker(parsed) -> bool:
"""
Check if email was already processed by our worker (loop detection)
Args:
parsed: Parsed email message object
Returns:
True if already processed
"""
x_worker_processed = parsed.get('X-SES-Worker-Processed', '')
auto_submitted = parsed.get('Auto-Submitted', '')
# Only skip if OUR header is present
is_processed_by_us = bool(x_worker_processed)
is_our_auto_reply = auto_submitted == 'auto-replied' and x_worker_processed
return is_processed_by_us or is_our_auto_reply