extractEnvelopeRecipients

This commit is contained in:
Andreas Knuth 2025-10-18 20:25:20 -05:00
parent 655050bf46
commit 6767d45aec
1 changed files with 83 additions and 28 deletions

View File

@ -19,11 +19,9 @@ function parseDate(dateInput: string | Date | undefined | null): Date | null {
try {
const date = dateInput instanceof Date ? dateInput : new Date(dateInput);
// Prüfe ob das Datum gültig ist
if (isNaN(date.getTime())) {
return null;
}
// Prüfe ob das Datum in einem vernünftigen Bereich liegt (1970-2100)
const year = date.getFullYear();
if (year < 1970 || year > 2100) {
return null;
@ -35,6 +33,42 @@ function parseDate(dateInput: string | Date | undefined | null): Date | null {
}
}
// Helper Funktion für Address-Extraktion aus Header-Feldern
function extractAddresses(addressObj: any): string[] {
if (!addressObj) return [];
if (Array.isArray(addressObj)) {
return addressObj.flatMap(t => t.value.map((v: any) => v.address?.toLowerCase() || '')).filter(Boolean);
}
return addressObj.value?.map((v: any) => v.address?.toLowerCase() || '').filter(Boolean) || [];
}
// NEUE FUNKTION: Extrahiere Envelope Recipients aus Received Headers
function extractEnvelopeRecipients(parsed: any): string[] {
const envelopeRecipients: string[] = [];
const receivedHeaders = parsed.headers.get('received');
if (receivedHeaders) {
const receivedArray = Array.isArray(receivedHeaders) ? receivedHeaders : [receivedHeaders];
for (const received of receivedArray) {
const receivedStr = typeof received === 'string' ? received : received.toString();
// Regex für "for <email@domain>" oder "for email@domain"
const forMatches = receivedStr.match(/\bfor\s+<?([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})>?/gi);
if (forMatches) {
forMatches.forEach(match => {
const emailMatch = match.match(/([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})/i);
if (emailMatch && emailMatch[1]) {
envelopeRecipients.push(emailMatch[1].toLowerCase());
}
});
}
}
}
// Dedupliziere
return [...new Set(envelopeRecipients)];
}
export async function syncAllDomains() {
console.log('Starting optimized syncAllDomains...');
const startTime = Date.now();
@ -52,11 +86,9 @@ export async function syncAllDomains() {
const bucket = bucketObj.Name!;
// Korrekte Domain-Konvertierung: Nur den letzten '-' vor '-emails' zu '.' machen
// Beispiel: bayarea-cc-com-emails -> bayarea-cc-com -> bayarea-cc.com
let domainName = bucket.replace('-emails', ''); // Entferne '-emails'
const lastDashIndex = domainName.lastIndexOf('-'); // Finde letzten Bindestrich
let domainName = bucket.replace('-emails', '');
const lastDashIndex = domainName.lastIndexOf('-');
if (lastDashIndex !== -1) {
// Ersetze nur den letzten Bindestrich durch einen Punkt
domainName = domainName.substring(0, lastDashIndex) + '.' + domainName.substring(lastDashIndex + 1);
}
@ -84,7 +116,6 @@ async function syncEmailsForDomainOptimized(domainId: number, bucket: string, s3
console.log(`Starting optimized sync for bucket: ${bucket}`);
const startTime = Date.now();
// 1. Hole alle S3 Keys auf einmal
let allS3Keys: string[] = [];
let continuationToken: string | undefined;
@ -106,7 +137,6 @@ async function syncEmailsForDomainOptimized(domainId: number, bucket: string, s3
if (allS3Keys.length === 0) return;
// 2. Hole alle existierenden E-Mails aus der DB in einem Query
const existingEmails = await db
.select({
s3Key: emails.s3Key,
@ -121,7 +151,6 @@ async function syncEmailsForDomainOptimized(domainId: number, bucket: string, s3
console.log(`Found ${existingEmails.length} existing emails in DB`);
// 3. Bestimme was zu tun ist
const toInsert: string[] = [];
const toUpdate: string[] = [];
@ -135,7 +164,6 @@ async function syncEmailsForDomainOptimized(domainId: number, bucket: string, s3
console.log(`To insert: ${toInsert.length}, To update: ${toUpdate.length}`);
// 4. Parallele Verarbeitung der Updates (Metadaten)
if (toUpdate.length > 0) {
const updateLimit = pLimit(CONCURRENT_S3_OPERATIONS);
const updatePromises = toUpdate.map(key =>
@ -169,7 +197,6 @@ async function syncEmailsForDomainOptimized(domainId: number, bucket: string, s3
await Promise.all(updatePromises);
}
// 5. Batch-Insert für neue E-Mails
if (toInsert.length > 0) {
console.log(`Processing ${toInsert.length} new emails...`);
@ -194,24 +221,34 @@ async function syncEmailsForDomainOptimized(domainId: number, bucket: string, s3
});
const metadata = headResponse.Metadata || {};
const to = extractAddresses(parsed.to);
const cc = extractAddresses(parsed.cc);
const bcc = extractAddresses(parsed.bcc);
// Versuche verschiedene Datum-Quellen in Reihenfolge der Präferenz
// NEUE LOGIK: Extrahiere Envelope Recipients aus Received Headers
let to: string[] = [];
let cc: string[] = [];
let bcc: string[] = [];
const envelopeRecipients = extractEnvelopeRecipients(parsed);
if (envelopeRecipients.length > 0) {
to = envelopeRecipients;
console.log(`[${key}] Using ${to.length} envelope recipient(s): ${to.join(', ')}`);
} else {
to = extractAddresses(parsed.to);
cc = extractAddresses(parsed.cc);
bcc = extractAddresses(parsed.bcc);
console.log(`[${key}] No envelope recipients found, using header recipients`);
}
let emailDate: Date | null = null;
// 1. Versuche parsed.date
if (parsed.date) {
emailDate = parseDate(parsed.date);
}
// 2. Falls parsed.date ungültig, versuche S3 LastModified
if (!emailDate && headResponse.LastModified) {
emailDate = parseDate(headResponse.LastModified);
}
// 3. Falls beides ungültig, verwende aktuelles Datum als Fallback
if (!emailDate) {
console.warn(`No valid date found for ${key}, using current date`);
emailDate = new Date();
@ -244,20 +281,38 @@ async function syncEmailsForDomainOptimized(domainId: number, bucket: string, s3
const emailData = (await Promise.all(emailDataPromises)).filter(Boolean);
if (emailData.length > 0) {
await db.insert(emails).values(emailData);
console.log(`Inserted ${emailData.length} emails`);
const validatedData = emailData.map(email => {
return {
...email,
date: email!.date || new Date(),
processedAt: email!.processedAt || null,
};
});
try {
await db.insert(emails).values(validatedData);
console.log(`Inserted ${validatedData.length} emails`);
} catch (dbError) {
console.error('Database insert error:', dbError);
console.log('Trying individual inserts to identify problematic email...');
for (const email of validatedData) {
try {
await db.insert(emails).values([email]);
console.log(`Successfully inserted: ${email!.s3Key}`);
} catch (singleError) {
console.error(`Failed to insert ${email!.s3Key}:`, singleError);
console.error('Problematic email data:', JSON.stringify({
s3Key: email!.s3Key,
date: email!.date,
processedAt: email!.processedAt,
}));
}
}
}
}
}
}
const duration = (Date.now() - startTime) / 1000;
console.log(`Sync for ${bucket} completed in ${duration}s`);
}
function extractAddresses(addressObj: any): string[] {
if (!addressObj) return [];
if (Array.isArray(addressObj)) {
return addressObj.flatMap(t => t.value.map((v: any) => v.address?.toLowerCase() || '')).filter(Boolean);
}
return addressObj.value?.map((v: any) => v.address?.toLowerCase() || '').filter(Boolean) || [];
}