From 6767d45aec56d45a716e15d7481dac82881576c2 Mon Sep 17 00:00:00 2001 From: Andreas Knuth Date: Sat, 18 Oct 2025 20:25:20 -0500 Subject: [PATCH] extractEnvelopeRecipients --- app/lib/sync.ts | 111 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 83 insertions(+), 28 deletions(-) diff --git a/app/lib/sync.ts b/app/lib/sync.ts index d24a7b2..fb5d1a2 100644 --- a/app/lib/sync.ts +++ b/app/lib/sync.ts @@ -19,11 +19,9 @@ function parseDate(dateInput: string | Date | undefined | null): Date | null { try { const date = dateInput instanceof Date ? dateInput : new Date(dateInput); - // Prüfe ob das Datum gültig ist if (isNaN(date.getTime())) { return null; } - // Prüfe ob das Datum in einem vernünftigen Bereich liegt (1970-2100) const year = date.getFullYear(); if (year < 1970 || year > 2100) { return null; @@ -35,6 +33,42 @@ function parseDate(dateInput: string | Date | undefined | null): Date | null { } } +// Helper Funktion für Address-Extraktion aus Header-Feldern +function extractAddresses(addressObj: any): string[] { + if (!addressObj) return []; + if (Array.isArray(addressObj)) { + return addressObj.flatMap(t => t.value.map((v: any) => v.address?.toLowerCase() || '')).filter(Boolean); + } + return addressObj.value?.map((v: any) => v.address?.toLowerCase() || '').filter(Boolean) || []; +} + +// NEUE FUNKTION: Extrahiere Envelope Recipients aus Received Headers +function extractEnvelopeRecipients(parsed: any): string[] { + const envelopeRecipients: string[] = []; + const receivedHeaders = parsed.headers.get('received'); + + if (receivedHeaders) { + const receivedArray = Array.isArray(receivedHeaders) ? receivedHeaders : [receivedHeaders]; + + for (const received of receivedArray) { + const receivedStr = typeof received === 'string' ? received : received.toString(); + // Regex für "for " oder "for email@domain" + const forMatches = receivedStr.match(/\bfor\s+?/gi); + if (forMatches) { + forMatches.forEach(match => { + const emailMatch = match.match(/([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})/i); + if (emailMatch && emailMatch[1]) { + envelopeRecipients.push(emailMatch[1].toLowerCase()); + } + }); + } + } + } + + // Dedupliziere + return [...new Set(envelopeRecipients)]; +} + export async function syncAllDomains() { console.log('Starting optimized syncAllDomains...'); const startTime = Date.now(); @@ -52,11 +86,9 @@ export async function syncAllDomains() { const bucket = bucketObj.Name!; // Korrekte Domain-Konvertierung: Nur den letzten '-' vor '-emails' zu '.' machen - // Beispiel: bayarea-cc-com-emails -> bayarea-cc-com -> bayarea-cc.com - let domainName = bucket.replace('-emails', ''); // Entferne '-emails' - const lastDashIndex = domainName.lastIndexOf('-'); // Finde letzten Bindestrich + let domainName = bucket.replace('-emails', ''); + const lastDashIndex = domainName.lastIndexOf('-'); if (lastDashIndex !== -1) { - // Ersetze nur den letzten Bindestrich durch einen Punkt domainName = domainName.substring(0, lastDashIndex) + '.' + domainName.substring(lastDashIndex + 1); } @@ -84,7 +116,6 @@ async function syncEmailsForDomainOptimized(domainId: number, bucket: string, s3 console.log(`Starting optimized sync for bucket: ${bucket}`); const startTime = Date.now(); - // 1. Hole alle S3 Keys auf einmal let allS3Keys: string[] = []; let continuationToken: string | undefined; @@ -106,7 +137,6 @@ async function syncEmailsForDomainOptimized(domainId: number, bucket: string, s3 if (allS3Keys.length === 0) return; - // 2. Hole alle existierenden E-Mails aus der DB in einem Query const existingEmails = await db .select({ s3Key: emails.s3Key, @@ -121,7 +151,6 @@ async function syncEmailsForDomainOptimized(domainId: number, bucket: string, s3 console.log(`Found ${existingEmails.length} existing emails in DB`); - // 3. Bestimme was zu tun ist const toInsert: string[] = []; const toUpdate: string[] = []; @@ -135,7 +164,6 @@ async function syncEmailsForDomainOptimized(domainId: number, bucket: string, s3 console.log(`To insert: ${toInsert.length}, To update: ${toUpdate.length}`); - // 4. Parallele Verarbeitung der Updates (Metadaten) if (toUpdate.length > 0) { const updateLimit = pLimit(CONCURRENT_S3_OPERATIONS); const updatePromises = toUpdate.map(key => @@ -169,7 +197,6 @@ async function syncEmailsForDomainOptimized(domainId: number, bucket: string, s3 await Promise.all(updatePromises); } - // 5. Batch-Insert für neue E-Mails if (toInsert.length > 0) { console.log(`Processing ${toInsert.length} new emails...`); @@ -194,24 +221,34 @@ async function syncEmailsForDomainOptimized(domainId: number, bucket: string, s3 }); const metadata = headResponse.Metadata || {}; - const to = extractAddresses(parsed.to); - const cc = extractAddresses(parsed.cc); - const bcc = extractAddresses(parsed.bcc); - // Versuche verschiedene Datum-Quellen in Reihenfolge der Präferenz + // NEUE LOGIK: Extrahiere Envelope Recipients aus Received Headers + let to: string[] = []; + let cc: string[] = []; + let bcc: string[] = []; + + const envelopeRecipients = extractEnvelopeRecipients(parsed); + + if (envelopeRecipients.length > 0) { + to = envelopeRecipients; + console.log(`[${key}] Using ${to.length} envelope recipient(s): ${to.join(', ')}`); + } else { + to = extractAddresses(parsed.to); + cc = extractAddresses(parsed.cc); + bcc = extractAddresses(parsed.bcc); + console.log(`[${key}] No envelope recipients found, using header recipients`); + } + let emailDate: Date | null = null; - // 1. Versuche parsed.date if (parsed.date) { emailDate = parseDate(parsed.date); } - // 2. Falls parsed.date ungültig, versuche S3 LastModified if (!emailDate && headResponse.LastModified) { emailDate = parseDate(headResponse.LastModified); } - // 3. Falls beides ungültig, verwende aktuelles Datum als Fallback if (!emailDate) { console.warn(`No valid date found for ${key}, using current date`); emailDate = new Date(); @@ -244,20 +281,38 @@ async function syncEmailsForDomainOptimized(domainId: number, bucket: string, s3 const emailData = (await Promise.all(emailDataPromises)).filter(Boolean); if (emailData.length > 0) { - await db.insert(emails).values(emailData); - console.log(`Inserted ${emailData.length} emails`); + const validatedData = emailData.map(email => { + return { + ...email, + date: email!.date || new Date(), + processedAt: email!.processedAt || null, + }; + }); + + try { + await db.insert(emails).values(validatedData); + console.log(`Inserted ${validatedData.length} emails`); + } catch (dbError) { + console.error('Database insert error:', dbError); + console.log('Trying individual inserts to identify problematic email...'); + for (const email of validatedData) { + try { + await db.insert(emails).values([email]); + console.log(`Successfully inserted: ${email!.s3Key}`); + } catch (singleError) { + console.error(`Failed to insert ${email!.s3Key}:`, singleError); + console.error('Problematic email data:', JSON.stringify({ + s3Key: email!.s3Key, + date: email!.date, + processedAt: email!.processedAt, + })); + } + } + } } } } const duration = (Date.now() - startTime) / 1000; console.log(`Sync for ${bucket} completed in ${duration}s`); -} - -function extractAddresses(addressObj: any): string[] { - if (!addressObj) return []; - if (Array.isArray(addressObj)) { - return addressObj.flatMap(t => t.value.map((v: any) => v.address?.toLowerCase() || '')).filter(Boolean); - } - return addressObj.value?.map((v: any) => v.address?.toLowerCase() || '').filter(Boolean) || []; } \ No newline at end of file