/** * Email parsing utilities * * Wraps `mailparser` for parsing raw MIME bytes and provides * header sanitization (e.g. Microsoft's malformed Message-IDs). */ import { simpleParser, type ParsedMail } from 'mailparser'; // --------------------------------------------------------------------------- // Types // --------------------------------------------------------------------------- export interface BodyParts { text: string; html: string | null; } // --------------------------------------------------------------------------- // Parser // --------------------------------------------------------------------------- /** * Parse raw email bytes into a ParsedMail object. * Applies pre-sanitization for known malformed headers before parsing. */ export async function parseEmail(raw: Buffer): Promise { // Pre-sanitize: fix Microsoft's [uuid]@domain Message-IDs const sanitized = sanitizeRawHeaders(raw); return simpleParser(sanitized); } /** * Extract text and HTML body parts from a parsed email. */ export function extractBodyParts(parsed: ParsedMail): BodyParts { const text = parsed.text?.trim() || '(No body content)'; const html = parsed.html || null; return { text, html }; } /** * Check if email was already processed by our worker (loop detection). */ export function isProcessedByWorker(parsed: ParsedMail): boolean { const headers = parsed.headers; const xWorker = headers.get('x-ses-worker-processed'); const autoSubmitted = headers.get('auto-submitted'); const isProcessedByUs = !!xWorker; const isOurAutoReply = autoSubmitted === 'auto-replied' && !!xWorker; return isProcessedByUs || isOurAutoReply; } /** * Check if email is a SES MAILER-DAEMON bounce notification. */ export function isSesBounceNotification(parsed: ParsedMail): boolean { const from = (parsed.from?.text ?? '').toLowerCase(); return from.includes('mailer-daemon@') && from.includes('amazonses.com'); } /** * Get a header value as string. Handles mailparser's headerlines Map. */ export function getHeader(parsed: ParsedMail, name: string): string { const val = parsed.headers.get(name.toLowerCase()); if (val === undefined || val === null) return ''; if (typeof val === 'string') return val; if (typeof val === 'object' && 'text' in val) return (val as any).text ?? ''; return String(val); } // --------------------------------------------------------------------------- // Raw header sanitization // --------------------------------------------------------------------------- /** * Fix known problematic patterns in raw MIME headers BEFORE parsing. * * Specifically targets Microsoft's `Message-ID: <[uuid]@domain>` which * causes strict parsers to crash. */ function sanitizeRawHeaders(raw: Buffer): Buffer { // We only need to check/fix the header section (before first blank line). // For efficiency we work on the first ~8KB where headers live. const headerEnd = findDoubleNewline(raw); const headerLen = headerEnd === -1 ? Math.min(raw.length, 8192) : headerEnd; const headerStr = raw.subarray(0, headerLen).toString('utf-8'); // Fix: Message-ID with square brackets <[...]@...> if (headerStr.includes('[') || headerStr.includes(']')) { const fixed = headerStr.replace( /^(Message-ID:\s*]*>?\s*)$/im, (_match, prefix, bracketed, suffix) => prefix + bracketed.replace(/\[/g, '').replace(/\]/g, '') + suffix, ); if (fixed !== headerStr) { return Buffer.concat([ Buffer.from(fixed, 'utf-8'), raw.subarray(headerLen), ]); } } return raw; } function findDoubleNewline(buf: Buffer): number { // Look for \r\n\r\n or \n\n for (let i = 0; i < buf.length - 3; i++) { if (buf[i] === 0x0d && buf[i + 1] === 0x0a && buf[i + 2] === 0x0d && buf[i + 3] === 0x0a) { return i; } if (buf[i] === 0x0a && buf[i + 1] === 0x0a) { return i; } } return -1; }