121 lines
3.9 KiB
TypeScript
121 lines
3.9 KiB
TypeScript
/**
|
|
* Email parsing utilities
|
|
*
|
|
* Wraps `mailparser` for parsing raw MIME bytes and provides
|
|
* header sanitization (e.g. Microsoft's malformed Message-IDs).
|
|
*/
|
|
|
|
import { simpleParser, type ParsedMail } from 'mailparser';
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Types
|
|
// ---------------------------------------------------------------------------
|
|
export interface BodyParts {
|
|
text: string;
|
|
html: string | null;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Parser
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Parse raw email bytes into a ParsedMail object.
|
|
* Applies pre-sanitization for known malformed headers before parsing.
|
|
*/
|
|
export async function parseEmail(raw: Buffer): Promise<ParsedMail> {
|
|
// Pre-sanitize: fix Microsoft's [uuid]@domain Message-IDs
|
|
const sanitized = sanitizeRawHeaders(raw);
|
|
return simpleParser(sanitized);
|
|
}
|
|
|
|
/**
|
|
* Extract text and HTML body parts from a parsed email.
|
|
*/
|
|
export function extractBodyParts(parsed: ParsedMail): BodyParts {
|
|
const text = parsed.text?.trim() || '(No body content)';
|
|
const html = parsed.html || null;
|
|
return { text, html };
|
|
}
|
|
|
|
/**
|
|
* Check if email was already processed by our worker (loop detection).
|
|
*/
|
|
export function isProcessedByWorker(parsed: ParsedMail): boolean {
|
|
const headers = parsed.headers;
|
|
const xWorker = headers.get('x-ses-worker-processed');
|
|
const autoSubmitted = headers.get('auto-submitted');
|
|
|
|
const isProcessedByUs = !!xWorker;
|
|
const isOurAutoReply = autoSubmitted === 'auto-replied' && !!xWorker;
|
|
|
|
return isProcessedByUs || isOurAutoReply;
|
|
}
|
|
|
|
/**
|
|
* Check if email is a SES MAILER-DAEMON bounce notification.
|
|
*/
|
|
export function isSesBounceNotification(parsed: ParsedMail): boolean {
|
|
const from = (parsed.from?.text ?? '').toLowerCase();
|
|
return from.includes('mailer-daemon@') && from.includes('amazonses.com');
|
|
}
|
|
|
|
/**
|
|
* Get a header value as string. Handles mailparser's headerlines Map.
|
|
*/
|
|
export function getHeader(parsed: ParsedMail, name: string): string {
|
|
const val = parsed.headers.get(name.toLowerCase());
|
|
if (val === undefined || val === null) return '';
|
|
if (typeof val === 'string') return val;
|
|
if (typeof val === 'object' && 'text' in val) return (val as any).text ?? '';
|
|
return String(val);
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Raw header sanitization
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Fix known problematic patterns in raw MIME headers BEFORE parsing.
|
|
*
|
|
* Specifically targets Microsoft's `Message-ID: <[uuid]@domain>` which
|
|
* causes strict parsers to crash.
|
|
*/
|
|
function sanitizeRawHeaders(raw: Buffer): Buffer {
|
|
// We only need to check/fix the header section (before first blank line).
|
|
// For efficiency we work on the first ~8KB where headers live.
|
|
const headerEnd = findDoubleNewline(raw);
|
|
const headerLen = headerEnd === -1 ? Math.min(raw.length, 8192) : headerEnd;
|
|
const headerStr = raw.subarray(0, headerLen).toString('utf-8');
|
|
|
|
// Fix: Message-ID with square brackets <[...]@...>
|
|
if (headerStr.includes('[') || headerStr.includes(']')) {
|
|
const fixed = headerStr.replace(
|
|
/^(Message-ID:\s*<?)(\[.*?\])(@[^>]*>?\s*)$/im,
|
|
(_match, prefix, bracketed, suffix) =>
|
|
prefix + bracketed.replace(/\[/g, '').replace(/\]/g, '') + suffix,
|
|
);
|
|
if (fixed !== headerStr) {
|
|
return Buffer.concat([
|
|
Buffer.from(fixed, 'utf-8'),
|
|
raw.subarray(headerLen),
|
|
]);
|
|
}
|
|
}
|
|
|
|
return raw;
|
|
}
|
|
|
|
function findDoubleNewline(buf: Buffer): number {
|
|
// Look for \r\n\r\n or \n\n
|
|
for (let i = 0; i < buf.length - 3; i++) {
|
|
if (buf[i] === 0x0d && buf[i + 1] === 0x0a && buf[i + 2] === 0x0d && buf[i + 3] === 0x0a) {
|
|
return i;
|
|
}
|
|
if (buf[i] === 0x0a && buf[i + 1] === 0x0a) {
|
|
return i;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|