extractEnvelopeRecipients
This commit is contained in:
parent
655050bf46
commit
6767d45aec
111
app/lib/sync.ts
111
app/lib/sync.ts
|
|
@ -19,11 +19,9 @@ function parseDate(dateInput: string | Date | undefined | null): Date | null {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const date = dateInput instanceof Date ? dateInput : new Date(dateInput);
|
const date = dateInput instanceof Date ? dateInput : new Date(dateInput);
|
||||||
// Prüfe ob das Datum gültig ist
|
|
||||||
if (isNaN(date.getTime())) {
|
if (isNaN(date.getTime())) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
// Prüfe ob das Datum in einem vernünftigen Bereich liegt (1970-2100)
|
|
||||||
const year = date.getFullYear();
|
const year = date.getFullYear();
|
||||||
if (year < 1970 || year > 2100) {
|
if (year < 1970 || year > 2100) {
|
||||||
return null;
|
return null;
|
||||||
|
|
@ -35,6 +33,42 @@ function parseDate(dateInput: string | Date | undefined | null): Date | null {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Helper Funktion für Address-Extraktion aus Header-Feldern
|
||||||
|
function extractAddresses(addressObj: any): string[] {
|
||||||
|
if (!addressObj) return [];
|
||||||
|
if (Array.isArray(addressObj)) {
|
||||||
|
return addressObj.flatMap(t => t.value.map((v: any) => v.address?.toLowerCase() || '')).filter(Boolean);
|
||||||
|
}
|
||||||
|
return addressObj.value?.map((v: any) => v.address?.toLowerCase() || '').filter(Boolean) || [];
|
||||||
|
}
|
||||||
|
|
||||||
|
// NEUE FUNKTION: Extrahiere Envelope Recipients aus Received Headers
|
||||||
|
function extractEnvelopeRecipients(parsed: any): string[] {
|
||||||
|
const envelopeRecipients: string[] = [];
|
||||||
|
const receivedHeaders = parsed.headers.get('received');
|
||||||
|
|
||||||
|
if (receivedHeaders) {
|
||||||
|
const receivedArray = Array.isArray(receivedHeaders) ? receivedHeaders : [receivedHeaders];
|
||||||
|
|
||||||
|
for (const received of receivedArray) {
|
||||||
|
const receivedStr = typeof received === 'string' ? received : received.toString();
|
||||||
|
// Regex für "for <email@domain>" oder "for email@domain"
|
||||||
|
const forMatches = receivedStr.match(/\bfor\s+<?([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})>?/gi);
|
||||||
|
if (forMatches) {
|
||||||
|
forMatches.forEach(match => {
|
||||||
|
const emailMatch = match.match(/([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})/i);
|
||||||
|
if (emailMatch && emailMatch[1]) {
|
||||||
|
envelopeRecipients.push(emailMatch[1].toLowerCase());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dedupliziere
|
||||||
|
return [...new Set(envelopeRecipients)];
|
||||||
|
}
|
||||||
|
|
||||||
export async function syncAllDomains() {
|
export async function syncAllDomains() {
|
||||||
console.log('Starting optimized syncAllDomains...');
|
console.log('Starting optimized syncAllDomains...');
|
||||||
const startTime = Date.now();
|
const startTime = Date.now();
|
||||||
|
|
@ -52,11 +86,9 @@ export async function syncAllDomains() {
|
||||||
const bucket = bucketObj.Name!;
|
const bucket = bucketObj.Name!;
|
||||||
|
|
||||||
// Korrekte Domain-Konvertierung: Nur den letzten '-' vor '-emails' zu '.' machen
|
// Korrekte Domain-Konvertierung: Nur den letzten '-' vor '-emails' zu '.' machen
|
||||||
// Beispiel: bayarea-cc-com-emails -> bayarea-cc-com -> bayarea-cc.com
|
let domainName = bucket.replace('-emails', '');
|
||||||
let domainName = bucket.replace('-emails', ''); // Entferne '-emails'
|
const lastDashIndex = domainName.lastIndexOf('-');
|
||||||
const lastDashIndex = domainName.lastIndexOf('-'); // Finde letzten Bindestrich
|
|
||||||
if (lastDashIndex !== -1) {
|
if (lastDashIndex !== -1) {
|
||||||
// Ersetze nur den letzten Bindestrich durch einen Punkt
|
|
||||||
domainName = domainName.substring(0, lastDashIndex) + '.' + domainName.substring(lastDashIndex + 1);
|
domainName = domainName.substring(0, lastDashIndex) + '.' + domainName.substring(lastDashIndex + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -84,7 +116,6 @@ async function syncEmailsForDomainOptimized(domainId: number, bucket: string, s3
|
||||||
console.log(`Starting optimized sync for bucket: ${bucket}`);
|
console.log(`Starting optimized sync for bucket: ${bucket}`);
|
||||||
const startTime = Date.now();
|
const startTime = Date.now();
|
||||||
|
|
||||||
// 1. Hole alle S3 Keys auf einmal
|
|
||||||
let allS3Keys: string[] = [];
|
let allS3Keys: string[] = [];
|
||||||
let continuationToken: string | undefined;
|
let continuationToken: string | undefined;
|
||||||
|
|
||||||
|
|
@ -106,7 +137,6 @@ async function syncEmailsForDomainOptimized(domainId: number, bucket: string, s3
|
||||||
|
|
||||||
if (allS3Keys.length === 0) return;
|
if (allS3Keys.length === 0) return;
|
||||||
|
|
||||||
// 2. Hole alle existierenden E-Mails aus der DB in einem Query
|
|
||||||
const existingEmails = await db
|
const existingEmails = await db
|
||||||
.select({
|
.select({
|
||||||
s3Key: emails.s3Key,
|
s3Key: emails.s3Key,
|
||||||
|
|
@ -121,7 +151,6 @@ async function syncEmailsForDomainOptimized(domainId: number, bucket: string, s3
|
||||||
|
|
||||||
console.log(`Found ${existingEmails.length} existing emails in DB`);
|
console.log(`Found ${existingEmails.length} existing emails in DB`);
|
||||||
|
|
||||||
// 3. Bestimme was zu tun ist
|
|
||||||
const toInsert: string[] = [];
|
const toInsert: string[] = [];
|
||||||
const toUpdate: string[] = [];
|
const toUpdate: string[] = [];
|
||||||
|
|
||||||
|
|
@ -135,7 +164,6 @@ async function syncEmailsForDomainOptimized(domainId: number, bucket: string, s3
|
||||||
|
|
||||||
console.log(`To insert: ${toInsert.length}, To update: ${toUpdate.length}`);
|
console.log(`To insert: ${toInsert.length}, To update: ${toUpdate.length}`);
|
||||||
|
|
||||||
// 4. Parallele Verarbeitung der Updates (Metadaten)
|
|
||||||
if (toUpdate.length > 0) {
|
if (toUpdate.length > 0) {
|
||||||
const updateLimit = pLimit(CONCURRENT_S3_OPERATIONS);
|
const updateLimit = pLimit(CONCURRENT_S3_OPERATIONS);
|
||||||
const updatePromises = toUpdate.map(key =>
|
const updatePromises = toUpdate.map(key =>
|
||||||
|
|
@ -169,7 +197,6 @@ async function syncEmailsForDomainOptimized(domainId: number, bucket: string, s3
|
||||||
await Promise.all(updatePromises);
|
await Promise.all(updatePromises);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 5. Batch-Insert für neue E-Mails
|
|
||||||
if (toInsert.length > 0) {
|
if (toInsert.length > 0) {
|
||||||
console.log(`Processing ${toInsert.length} new emails...`);
|
console.log(`Processing ${toInsert.length} new emails...`);
|
||||||
|
|
||||||
|
|
@ -194,24 +221,34 @@ async function syncEmailsForDomainOptimized(domainId: number, bucket: string, s3
|
||||||
});
|
});
|
||||||
|
|
||||||
const metadata = headResponse.Metadata || {};
|
const metadata = headResponse.Metadata || {};
|
||||||
const to = extractAddresses(parsed.to);
|
|
||||||
const cc = extractAddresses(parsed.cc);
|
|
||||||
const bcc = extractAddresses(parsed.bcc);
|
|
||||||
|
|
||||||
// Versuche verschiedene Datum-Quellen in Reihenfolge der Präferenz
|
// NEUE LOGIK: Extrahiere Envelope Recipients aus Received Headers
|
||||||
|
let to: string[] = [];
|
||||||
|
let cc: string[] = [];
|
||||||
|
let bcc: string[] = [];
|
||||||
|
|
||||||
|
const envelopeRecipients = extractEnvelopeRecipients(parsed);
|
||||||
|
|
||||||
|
if (envelopeRecipients.length > 0) {
|
||||||
|
to = envelopeRecipients;
|
||||||
|
console.log(`[${key}] Using ${to.length} envelope recipient(s): ${to.join(', ')}`);
|
||||||
|
} else {
|
||||||
|
to = extractAddresses(parsed.to);
|
||||||
|
cc = extractAddresses(parsed.cc);
|
||||||
|
bcc = extractAddresses(parsed.bcc);
|
||||||
|
console.log(`[${key}] No envelope recipients found, using header recipients`);
|
||||||
|
}
|
||||||
|
|
||||||
let emailDate: Date | null = null;
|
let emailDate: Date | null = null;
|
||||||
|
|
||||||
// 1. Versuche parsed.date
|
|
||||||
if (parsed.date) {
|
if (parsed.date) {
|
||||||
emailDate = parseDate(parsed.date);
|
emailDate = parseDate(parsed.date);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2. Falls parsed.date ungültig, versuche S3 LastModified
|
|
||||||
if (!emailDate && headResponse.LastModified) {
|
if (!emailDate && headResponse.LastModified) {
|
||||||
emailDate = parseDate(headResponse.LastModified);
|
emailDate = parseDate(headResponse.LastModified);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. Falls beides ungültig, verwende aktuelles Datum als Fallback
|
|
||||||
if (!emailDate) {
|
if (!emailDate) {
|
||||||
console.warn(`No valid date found for ${key}, using current date`);
|
console.warn(`No valid date found for ${key}, using current date`);
|
||||||
emailDate = new Date();
|
emailDate = new Date();
|
||||||
|
|
@ -244,20 +281,38 @@ async function syncEmailsForDomainOptimized(domainId: number, bucket: string, s3
|
||||||
const emailData = (await Promise.all(emailDataPromises)).filter(Boolean);
|
const emailData = (await Promise.all(emailDataPromises)).filter(Boolean);
|
||||||
|
|
||||||
if (emailData.length > 0) {
|
if (emailData.length > 0) {
|
||||||
await db.insert(emails).values(emailData);
|
const validatedData = emailData.map(email => {
|
||||||
console.log(`Inserted ${emailData.length} emails`);
|
return {
|
||||||
|
...email,
|
||||||
|
date: email!.date || new Date(),
|
||||||
|
processedAt: email!.processedAt || null,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
await db.insert(emails).values(validatedData);
|
||||||
|
console.log(`Inserted ${validatedData.length} emails`);
|
||||||
|
} catch (dbError) {
|
||||||
|
console.error('Database insert error:', dbError);
|
||||||
|
console.log('Trying individual inserts to identify problematic email...');
|
||||||
|
for (const email of validatedData) {
|
||||||
|
try {
|
||||||
|
await db.insert(emails).values([email]);
|
||||||
|
console.log(`Successfully inserted: ${email!.s3Key}`);
|
||||||
|
} catch (singleError) {
|
||||||
|
console.error(`Failed to insert ${email!.s3Key}:`, singleError);
|
||||||
|
console.error('Problematic email data:', JSON.stringify({
|
||||||
|
s3Key: email!.s3Key,
|
||||||
|
date: email!.date,
|
||||||
|
processedAt: email!.processedAt,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const duration = (Date.now() - startTime) / 1000;
|
const duration = (Date.now() - startTime) / 1000;
|
||||||
console.log(`Sync for ${bucket} completed in ${duration}s`);
|
console.log(`Sync for ${bucket} completed in ${duration}s`);
|
||||||
}
|
|
||||||
|
|
||||||
function extractAddresses(addressObj: any): string[] {
|
|
||||||
if (!addressObj) return [];
|
|
||||||
if (Array.isArray(addressObj)) {
|
|
||||||
return addressObj.flatMap(t => t.value.map((v: any) => v.address?.toLowerCase() || '')).filter(Boolean);
|
|
||||||
}
|
|
||||||
return addressObj.value?.map((v: any) => v.address?.toLowerCase() || '').filter(Boolean) || [];
|
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue