This commit is contained in:
Andreas Knuth 2025-10-16 17:48:29 -05:00
parent eeac2c6904
commit 45a61a032c
8 changed files with 168 additions and 373 deletions

View File

@ -1,12 +1,8 @@
import { NextRequest, NextResponse } from 'next/server'; import { NextRequest, NextResponse } from 'next/server';
import { db } from '@/app/db/drizzle'; import { db } from '@/app/db/drizzle';
import { emails } from '@/app/db/schema'; import { emails } from '@/app/db/schema';
import { authenticate, getBody } from '@/app/lib/utils'; import { authenticate } from '@/app/lib/utils';
import { eq } from 'drizzle-orm'; import { eq } from 'drizzle-orm';
import { CopyObjectCommand, GetObjectCommand, HeadObjectCommand } from '@aws-sdk/client-s3';
import { getS3Client } from '@/app/lib/utils';
import nodemailer from 'nodemailer';
import { Readable } from 'stream';
export async function GET(req: NextRequest) { export async function GET(req: NextRequest) {
if (!authenticate(req)) return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }); if (!authenticate(req)) return NextResponse.json({ error: 'Unauthorized' }, { status: 401 });
@ -26,67 +22,9 @@ export async function GET(req: NextRequest) {
html: email.html, html: email.html,
raw: email.raw, raw: email.raw,
processed: email.processed ? 'true' : 'false', processed: email.processed ? 'true' : 'false',
processedAt: email.processedAt?.toISOString() || null,
processedBy: email.processedBy,
queuedTo: email.queuedTo,
status: email.status,
}); });
} }
// PUT: Update processed in S3 and DB
export async function PUT(req: NextRequest) {
if (!authenticate(req)) return NextResponse.json({ error: 'Unauthorized' }, { status: 401 });
const { bucket, key, processed } = await req.json();
if (!bucket || !key) return NextResponse.json({ error: 'Missing params' }, { status: 400 });
const s3 = getS3Client();
const head = await s3.send(new HeadObjectCommand({ Bucket: bucket, Key: key }));
const newMeta = { ...head.Metadata, [process.env.PROCESSED_META_KEY!]: processed };
await s3.send(new CopyObjectCommand({
Bucket: bucket,
Key: key,
CopySource: `${bucket}/${key}`,
Metadata: newMeta,
MetadataDirective: 'REPLACE'
}));
await db.update(emails).set({ processed: processed === 'true' }).where(eq(emails.s3Key, key));
return NextResponse.json({ success: true });
}
// POST: Resend, update in S3 and DB
export async function POST(req: NextRequest) {
if (!authenticate(req)) return NextResponse.json({ error: 'Unauthorized' }, { status: 401 });
const { bucket, key } = await req.json();
if (!bucket || !key) return NextResponse.json({ error: 'Missing params' }, { status: 400 });
const s3 = getS3Client();
const { Body } = await s3.send(new GetObjectCommand({ Bucket: bucket, Key: key }));
const raw = await getBody(Body as Readable);
const transporter = nodemailer.createTransport({
host: process.env.SMTP_HOST,
port: Number(process.env.SMTP_PORT),
secure: false,
auth: { user: process.env.SMTP_USER, pass: process.env.SMTP_PASS },
tls: { rejectUnauthorized: false }
});
try {
await transporter.sendMail({ raw });
// Update S3 Metadata
const head = await s3.send(new HeadObjectCommand({ Bucket: bucket, Key: key }));
const newMeta = { ...head.Metadata, [process.env.PROCESSED_META_KEY!]: process.env.PROCESSED_META_VALUE! };
await s3.send(new CopyObjectCommand({
Bucket: bucket,
Key: key,
CopySource: `${bucket}/${key}`,
Metadata: newMeta,
MetadataDirective: 'REPLACE'
}));
// Update DB
await db.update(emails).set({ processed: true }).where(eq(emails.s3Key, key));
return NextResponse.json({ message: 'Resent successfully' });
} catch (error) {
return NextResponse.json({ error: (error as Error).message }, { status: 500 });
}
}

View File

@ -20,7 +20,20 @@ export async function GET(req: NextRequest) {
subject: emails.subject, subject: emails.subject,
date: emails.date, date: emails.date,
processed: emails.processed, processed: emails.processed,
processedAt: emails.processedAt,
processedBy: emails.processedBy,
queuedTo: emails.queuedTo,
status: emails.status,
}).from(emails).where(sql`${mailbox} = ANY(${emails.to}) AND ${emails.domainId} = ${domain.id}`); }).from(emails).where(sql`${mailbox} = ANY(${emails.to}) AND ${emails.domainId} = ${domain.id}`);
return NextResponse.json(emailList.map(e => ({ key: e.key, subject: e.subject, date: e.date?.toISOString(), processed: e.processed ? 'true' : 'false' }))); return NextResponse.json(emailList.map(e => ({
key: e.key,
subject: e.subject,
date: e.date?.toISOString(),
processed: e.processed ? 'true' : 'false',
processedAt: e.processedAt?.toISOString() || null,
processedBy: e.processedBy,
queuedTo: e.queuedTo,
status: e.status,
})));
} }

View File

@ -2,7 +2,7 @@ import { NextRequest, NextResponse } from 'next/server';
import { db } from '@/app/db/drizzle'; import { db } from '@/app/db/drizzle';
import { domains, emails } from '@/app/db/schema'; import { domains, emails } from '@/app/db/schema';
import { authenticate } from '@/app/lib/utils'; import { authenticate } from '@/app/lib/utils';
import { eq, sql } from 'drizzle-orm'; import { eq } from 'drizzle-orm';
export async function GET(req: NextRequest) { export async function GET(req: NextRequest) {
if (!authenticate(req)) return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }); if (!authenticate(req)) return NextResponse.json({ error: 'Unauthorized' }, { status: 401 });
@ -14,9 +14,24 @@ export async function GET(req: NextRequest) {
const [domain] = await db.select().from(domains).where(eq(domains.bucket, bucket)); const [domain] = await db.select().from(domains).where(eq(domains.bucket, bucket));
if (!domain) return NextResponse.json({ error: 'Domain not found' }, { status: 404 }); if (!domain) return NextResponse.json({ error: 'Domain not found' }, { status: 404 });
// Hole alle E-Mail-Adressen aus den "to" Feldern für diese Domain
const mailboxData = await db.select({ to: emails.to }).from(emails).where(eq(emails.domainId, domain.id)); const mailboxData = await db.select({ to: emails.to }).from(emails).where(eq(emails.domainId, domain.id));
const uniqueMailboxes = new Set<string>();
mailboxData.forEach(em => em.to?.forEach(r => uniqueMailboxes.add(r.toLowerCase())));
return NextResponse.json(Array.from(uniqueMailboxes)); // Extrahiere die Domain aus dem Bucket-Namen (z.B. "example-com-emails" -> "example.com")
const domainName = bucket.replace('-emails', '').replace(/-/g, '.');
const uniqueMailboxes = new Set<string>();
// Filtere nur E-Mail-Adressen, die zur aktuellen Domain gehören
mailboxData.forEach(em => {
em.to?.forEach(recipient => {
const recipientLower = recipient.toLowerCase();
// Prüfe, ob die E-Mail-Adresse zur Domain gehört
if (recipientLower.endsWith(`@${domainName}`)) {
uniqueMailboxes.add(recipientLower);
}
});
});
return NextResponse.json(Array.from(uniqueMailboxes).sort());
} }

View File

@ -1,30 +0,0 @@
import { NextRequest, NextResponse } from 'next/server';
import { db } from '@/app/db/drizzle';
import { domains, emails } from '@/app/db/schema';
import { authenticate } from '@/app/lib/utils';
import { eq } from 'drizzle-orm';
export async function POST(req: NextRequest) {
if (!authenticate(req)) return NextResponse.json({ error: 'Unauthorized' }, { status: 401 });
const { bucket } = await req.json();
if (!bucket) return NextResponse.json({ error: 'Missing bucket' }, { status: 400 });
const [domain] = await db.select().from(domains).where(eq(domains.bucket, bucket));
if (!domain) return NextResponse.json({ error: 'Domain not found' }, { status: 404 });
const unprocessed = await db.select({ s3Key: emails.s3Key }).from(emails).where(eq(emails.processed, false));
let count = 0;
for (const em of unprocessed) {
// Call POST /api/email internally for resend (updates DB/S3)
await fetch(`${req.headers.get('origin')}/api/email`, {
method: 'POST',
headers: { 'Content-Type': 'application/json', Authorization: req.headers.get('Authorization')! },
body: JSON.stringify({ bucket, key: em.s3Key }),
});
count++;
}
return NextResponse.json({ message: `Resent ${count} emails` });
}

View File

@ -19,4 +19,9 @@ export const emails = pgTable('emails', {
raw: text('raw'), raw: text('raw'),
processed: boolean('processed').default(false), processed: boolean('processed').default(false),
date: timestamp('date'), date: timestamp('date'),
// Neue Metadaten
processedAt: timestamp('processed_at'),
processedBy: text('processed_by'),
queuedTo: text('queued_to'),
status: text('status'),
}); });

View File

@ -8,8 +8,19 @@ export default function EmailDetail() {
const searchParams = useSearchParams(); const searchParams = useSearchParams();
const bucket = searchParams.get('bucket'); const bucket = searchParams.get('bucket');
const key = searchParams.get('key'); const key = searchParams.get('key');
const mailbox = searchParams.get('mailbox'); // Für Breadcrumb const mailbox = searchParams.get('mailbox');
const [email, setEmail] = useState({ subject: '', from: '', to: '', html: '', raw: '', processed: '' }); const [email, setEmail] = useState({
subject: '',
from: '',
to: '',
html: '',
raw: '',
processed: '',
processedAt: null as string | null,
processedBy: null as string | null,
queuedTo: null as string | null,
status: null as string | null,
});
const [viewMode, setViewMode] = useState('html'); const [viewMode, setViewMode] = useState('html');
const [error, setError] = useState<string | null>(null); const [error, setError] = useState<string | null>(null);
const [loading, setLoading] = useState(true); const [loading, setLoading] = useState(true);
@ -42,6 +53,20 @@ export default function EmailDetail() {
if (loading) return <div className="min-h-screen flex items-center justify-center bg-gray-100">Loading...</div>; if (loading) return <div className="min-h-screen flex items-center justify-center bg-gray-100">Loading...</div>;
if (error) return <div className="min-h-screen flex items-center justify-center bg-gray-100 text-red-500">{error}</div>; if (error) return <div className="min-h-screen flex items-center justify-center bg-gray-100 text-red-500">{error}</div>;
const formatDate = (dateStr: string | null) => {
if (!dateStr) return 'N/A';
const date = new Date(dateStr);
return date.toLocaleString('en-US', {
year: 'numeric',
month: '2-digit',
day: '2-digit',
hour: '2-digit',
minute: '2-digit',
second: '2-digit',
hour12: false
});
};
return ( return (
<div className="min-h-screen bg-gradient-to-b from-blue-50 to-gray-100 p-8"> <div className="min-h-screen bg-gradient-to-b from-blue-50 to-gray-100 p-8">
<nav className="max-w-4xl mx-auto mb-6 bg-white p-4 rounded-lg shadow-sm"> <nav className="max-w-4xl mx-auto mb-6 bg-white p-4 rounded-lg shadow-sm">
@ -59,10 +84,38 @@ export default function EmailDetail() {
</nav> </nav>
<div className="max-w-4xl mx-auto bg-white rounded-lg shadow-md p-8"> <div className="max-w-4xl mx-auto bg-white rounded-lg shadow-md p-8">
<h1 className="text-4xl font-bold mb-6 text-gray-800">{email.subject}</h1> <h1 className="text-4xl font-bold mb-6 text-gray-800">{email.subject}</h1>
<p className="text-gray-700 mb-2 text-lg"><strong>From:</strong> {email.from}</p>
<p className="text-gray-700 mb-2 text-lg"><strong>To:</strong> {email.to}</p> <div className="grid grid-cols-2 gap-4 mb-6 bg-gray-50 p-6 rounded-lg">
<p className="text-gray-700 mb-2 text-lg"><strong>S3 Key:</strong> {key}</p> <div>
<p className="text-gray-700 mb-6 text-lg"><strong>Processed:</strong> {email.processed}</p> <p className="text-gray-700 mb-2"><strong>From:</strong> {email.from}</p>
<p className="text-gray-700 mb-2"><strong>To:</strong> {email.to}</p>
<p className="text-gray-700 mb-2"><strong>S3 Key:</strong> <span className="text-sm break-all">{key}</span></p>
</div>
<div>
<p className="text-gray-700 mb-2">
<strong>Processed:</strong>
<span className={`ml-2 px-2 py-1 rounded-full text-xs font-medium ${email.processed === 'true' ? 'bg-green-100 text-green-800' : 'bg-yellow-100 text-yellow-800'}`}>
{email.processed === 'true' ? 'Yes' : 'No'}
</span>
</p>
<p className="text-gray-700 mb-2"><strong>Processed At:</strong> {formatDate(email.processedAt)}</p>
<p className="text-gray-700 mb-2"><strong>Processed By:</strong> {email.processedBy || 'N/A'}</p>
<p className="text-gray-700 mb-2"><strong>Queued To:</strong> {email.queuedTo || 'N/A'}</p>
<p className="text-gray-700 mb-2">
<strong>Status:</strong>
{email.status ? (
<span className={`ml-2 px-2 py-1 rounded-full text-xs font-medium ${
email.status === 'delivered' ? 'bg-green-100 text-green-800' :
email.status === 'failed' ? 'bg-red-100 text-red-800' :
'bg-gray-100 text-gray-800'
}`}>
{email.status}
</span>
) : 'N/A'}
</p>
</div>
</div>
<div className="flex mb-6 space-x-2"> <div className="flex mb-6 space-x-2">
<button <button
onClick={() => setViewMode('html')} onClick={() => setViewMode('html')}

View File

@ -1,161 +1,37 @@
'use client'; import { NextRequest, NextResponse } from 'next/server';
import { db } from '@/app/db/drizzle';
import { domains, emails } from '@/app/db/schema';
import { authenticate } from '@/app/lib/utils';
import { eq } from 'drizzle-orm';
import { useState, useEffect } from 'react'; export async function GET(req: NextRequest) {
import { useSearchParams } from 'next/navigation'; if (!authenticate(req)) return NextResponse.json({ error: 'Unauthorized' }, { status: 401 });
import Link from 'next/link';
interface Email { const { searchParams } = new URL(req.url);
key: string;
subject: string;
date: string;
processed: string;
}
export default function Emails() {
const searchParams = useSearchParams();
const bucket = searchParams.get('bucket'); const bucket = searchParams.get('bucket');
const mailbox = searchParams.get('mailbox'); if (!bucket) return NextResponse.json({ error: 'Missing bucket' }, { status: 400 });
const [emails, setEmails] = useState<Email[]>([]);
const [message, setMessage] = useState('');
const [error, setError] = useState<string | null>(null);
const [loading, setLoading] = useState(true);
useEffect(() => { const [domain] = await db.select().from(domains).where(eq(domains.bucket, bucket));
if (!bucket || !mailbox) { if (!domain) return NextResponse.json({ error: 'Domain not found' }, { status: 404 });
setError('Missing parameters');
setLoading(false); // Hole alle E-Mail-Adressen aus den "to" Feldern für diese Domain
return; const mailboxData = await db.select({ to: emails.to }).from(emails).where(eq(emails.domainId, domain.id));
// Extrahiere die Domain aus dem Bucket-Namen (z.B. "example-com-emails" -> "example.com")
const domainName = bucket.replace('-emails', '').replace(/-/g, '.');
const uniqueMailboxes = new Set<string>();
// Filtere nur E-Mail-Adressen, die zur aktuellen Domain gehören
mailboxData.forEach(em => {
em.to?.forEach(recipient => {
const recipientLower = recipient.toLowerCase();
// Prüfe, ob die E-Mail-Adresse zur Domain gehört
if (recipientLower.endsWith(`@${domainName}`)) {
uniqueMailboxes.add(recipientLower);
} }
const auth = localStorage.getItem('auth'); });
if (!auth) { });
setError('Not authenticated');
setLoading(false); return NextResponse.json(Array.from(uniqueMailboxes).sort());
return;
}
fetch(`/api/emails?bucket=${bucket}&mailbox=${encodeURIComponent(mailbox)}`, {
headers: { Authorization: `Basic ${auth}` }
})
.then(res => {
if (!res.ok) throw new Error('Failed to fetch emails');
return res.json();
})
.then(data => {
// Sortiere nach date descending
const sorted = data.sort((a: Email, b: Email) => new Date(b.date).getTime() - new Date(a.date).getTime());
setEmails(sorted);
})
.catch(err => setError(err.message))
.finally(() => setLoading(false));
}, [bucket, mailbox]);
if (loading) return <div className="min-h-screen flex items-center justify-center bg-gray-100">Loading...</div>;
if (error) return <div className="min-h-screen flex items-center justify-center bg-gray-100 text-red-500">{error}</div>;
const formatDate = (dateStr: string) => {
const date = new Date(dateStr);
return date.toLocaleString('en-US', { year: 'numeric', month: '2-digit', day: '2-digit', hour: '2-digit', minute: '2-digit', hour12: false });
};
const handleResendAll = async () => {
const auth = localStorage.getItem('auth');
if (!auth) return setMessage('Not authenticated');
const response = await fetch('/api/resend-domain', {
method: 'POST',
headers: { Authorization: `Basic ${auth}`, 'Content-Type': 'application/json' },
body: JSON.stringify({ bucket }),
});
const res = await response.json();
setMessage(res.message || res.error);
};
const handleUpdateProcessed = async (key: string, newValue: boolean) => {
const auth = localStorage.getItem('auth');
if (!auth) return;
await fetch('/api/email', {
method: 'PUT',
headers: { Authorization: `Basic ${auth}`, 'Content-Type': 'application/json' },
body: JSON.stringify({ bucket, key, processed: newValue ? 'true' : 'false' }),
});
setEmails(emails.map(em => em.key === key ? { ...em, processed: newValue ? 'true' : 'false' } : em));
};
const handleResend = async (key: string) => {
const auth = localStorage.getItem('auth');
if (!auth) return alert('Not authenticated');
const response = await fetch('/api/email', {
method: 'POST',
headers: { Authorization: `Basic ${auth}`, 'Content-Type': 'application/json' },
body: JSON.stringify({ bucket, key }),
});
const res = await response.json();
alert(res.message || res.error);
};
return (
<div className="min-h-screen bg-gradient-to-b from-blue-50 to-gray-100 p-8">
<nav className="max-w-4xl mx-auto mb-6 bg-white p-4 rounded-lg shadow-sm">
<ol className="flex flex-wrap space-x-2 text-sm text-gray-500">
<li><Link href="/" className="hover:text-blue-600">Home</Link></li>
<li className="mx-1">/</li>
<li><Link href="/domains" className="hover:text-blue-600">Domains</Link></li>
<li className="mx-1">/</li>
<li><Link href={`/mailboxes?bucket=${bucket}`} className="hover:text-blue-600">Mailboxes</Link></li>
<li className="mx-1">/</li>
<li className="font-semibold text-gray-700">Emails</li>
</ol>
</nav>
<h1 className="text-4xl font-bold mb-8 text-center text-gray-800">Emails for {mailbox} in {bucket}</h1>
<div className="flex justify-center mb-6">
<button
onClick={handleResendAll}
className="bg-green-500 text-white px-8 py-3 rounded-lg hover:bg-green-600 transition shadow-md"
>
Resend all unprocessed
</button>
</div>
{message && <p className="text-center mb-6 text-blue-600 font-medium">{message}</p>}
<div className="overflow-x-auto max-w-4xl mx-auto bg-white rounded-lg shadow-md">
<table className="min-w-full divide-y divide-gray-200">
<thead className="bg-blue-50">
<tr>
<th className="px-6 py-3 text-left text-xs font-medium text-gray-700 uppercase tracking-wider">Subject</th>
<th className="px-6 py-3 text-left text-xs font-medium text-gray-700 uppercase tracking-wider">Date</th>
<th className="px-6 py-3 text-left text-xs font-medium text-gray-700 uppercase tracking-wider">S3 Key</th>
<th className="px-6 py-3 text-left text-xs font-medium text-gray-700 uppercase tracking-wider">Processed</th>
<th className="px-6 py-3 text-left text-xs font-medium text-gray-700 uppercase tracking-wider">Actions</th>
</tr>
</thead>
<tbody className="bg-white divide-y divide-gray-200">
{emails.map((e: Email) => (
<tr key={e.key} className="hover:bg-blue-50 transition">
<td className="px-6 py-4 whitespace-nowrap text-sm text-gray-900">{e.subject}</td>
<td className="px-6 py-4 whitespace-nowrap text-sm text-gray-500">{formatDate(e.date)}</td>
<td className="px-6 py-4 whitespace-nowrap text-sm text-gray-500 truncate max-w-xs">{e.key}</td>
<td className="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
<input
type="checkbox"
checked={e.processed === 'true'}
onChange={() => handleUpdateProcessed(e.key, e.processed !== 'true')}
className="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300 rounded"
/>
</td>
<td className="px-6 py-4 whitespace-nowrap text-sm font-medium">
<Link href={`/email?bucket=${bucket}&key=${e.key}&mailbox=${encodeURIComponent(mailbox || '')}`} className="text-blue-600 hover:text-blue-900 mr-4">
View
</Link>
<button onClick={() => handleResend(e.key)} className="text-green-600 hover:text-green-900">
Resend
</button>
</td>
</tr>
))}
</tbody>
</table>
</div>
</div>
);
} }

View File

@ -5,13 +5,13 @@ import { simpleParser } from 'mailparser';
import { ListBucketsCommand, ListObjectsV2Command, GetObjectCommand, HeadObjectCommand, S3Client } from '@aws-sdk/client-s3'; import { ListBucketsCommand, ListObjectsV2Command, GetObjectCommand, HeadObjectCommand, S3Client } from '@aws-sdk/client-s3';
import { eq, sql, inArray } from 'drizzle-orm'; import { eq, sql, inArray } from 'drizzle-orm';
import { Readable } from 'stream'; import { Readable } from 'stream';
import pLimit from 'p-limit'; // Für parallele Verarbeitung mit Limit import pLimit from 'p-limit';
import pRetry from 'p-retry'; import pRetry from 'p-retry';
// Konfigurierbare Konstanten // Konfigurierbare Konstanten
const CONCURRENT_S3_OPERATIONS = 10; // Parallele S3 Operationen const CONCURRENT_S3_OPERATIONS = 10;
const BATCH_INSERT_SIZE = 100; // Batch-Größe für DB Inserts const BATCH_INSERT_SIZE = 100;
const CONCURRENT_EMAIL_PARSING = 5; // Parallele E-Mail Parser const CONCURRENT_EMAIL_PARSING = 5;
export async function syncAllDomains() { export async function syncAllDomains() {
console.log('Starting optimized syncAllDomains...'); console.log('Starting optimized syncAllDomains...');
@ -22,8 +22,7 @@ export async function syncAllDomains() {
const domainBuckets = Buckets?.filter(b => b.Name?.endsWith('-emails')) || []; const domainBuckets = Buckets?.filter(b => b.Name?.endsWith('-emails')) || [];
console.log(`Found ${domainBuckets.length} domain buckets`); console.log(`Found ${domainBuckets.length} domain buckets`);
// Parallele Verarbeitung der Buckets const bucketLimit = pLimit(3);
const bucketLimit = pLimit(3); // Max 3 Buckets parallel
await Promise.all( await Promise.all(
domainBuckets.map(bucketObj => domainBuckets.map(bucketObj =>
@ -32,7 +31,6 @@ export async function syncAllDomains() {
const domainName = bucket.replace('-emails', '').replace(/-/g, '.'); const domainName = bucket.replace('-emails', '').replace(/-/g, '.');
console.log(`Processing bucket: ${bucket}`); console.log(`Processing bucket: ${bucket}`);
// Upsert Domain
const [domain] = await db const [domain] = await db
.insert(domains) .insert(domains)
.values({ bucket, domain: domainName }) .values({ bucket, domain: domainName })
@ -63,7 +61,7 @@ async function syncEmailsForDomainOptimized(domainId: number, bucket: string, s3
const response = await pRetry( const response = await pRetry(
() => s3.send(new ListObjectsV2Command({ () => s3.send(new ListObjectsV2Command({
Bucket: bucket, Bucket: bucket,
MaxKeys: 1000, // Maximum per Request MaxKeys: 1000,
ContinuationToken: continuationToken ContinuationToken: continuationToken
})), })),
{ retries: 3 } { retries: 3 }
@ -94,37 +92,45 @@ async function syncEmailsForDomainOptimized(domainId: number, bucket: string, s3
// 3. Bestimme was zu tun ist // 3. Bestimme was zu tun ist
const toInsert: string[] = []; const toInsert: string[] = [];
const toCheckProcessed: string[] = []; const toUpdate: string[] = [];
for (const key of allS3Keys) { for (const key of allS3Keys) {
if (!existingKeysMap.has(key)) { if (!existingKeysMap.has(key)) {
toInsert.push(key); toInsert.push(key);
} else { } else {
toCheckProcessed.push(key); toUpdate.push(key);
} }
} }
console.log(`To insert: ${toInsert.length}, To check: ${toCheckProcessed.length}`); console.log(`To insert: ${toInsert.length}, To update: ${toUpdate.length}`);
// 4. Parallele Verarbeitung der Updates (Processed Status) // 4. Parallele Verarbeitung der Updates (Metadaten)
if (toCheckProcessed.length > 0) { if (toUpdate.length > 0) {
const updateLimit = pLimit(CONCURRENT_S3_OPERATIONS); const updateLimit = pLimit(CONCURRENT_S3_OPERATIONS);
const updatePromises = toCheckProcessed.map(key => const updatePromises = toUpdate.map(key =>
updateLimit(async () => { updateLimit(async () => {
try { try {
const head = await s3.send(new HeadObjectCommand({ Bucket: bucket, Key: key })); const head = await s3.send(new HeadObjectCommand({ Bucket: bucket, Key: key }));
const processed = head.Metadata?.[process.env.PROCESSED_META_KEY!] === process.env.PROCESSED_META_VALUE!; const metadata = head.Metadata || {};
const currentProcessed = existingKeysMap.get(key);
const processed = metadata[process.env.PROCESSED_META_KEY!] === process.env.PROCESSED_META_VALUE!;
const processedAt = metadata['processed_at'] ? new Date(metadata['processed_at']) : null;
const processedBy = metadata['processed_by'] || null;
const queuedTo = metadata['queued_to'] || null;
const status = metadata['status'] || null;
if (currentProcessed !== processed) {
await db await db
.update(emails) .update(emails)
.set({ processed }) .set({
processed,
processedAt,
processedBy,
queuedTo,
status
})
.where(eq(emails.s3Key, key)); .where(eq(emails.s3Key, key));
console.log(`Updated processed status for ${key}`);
}
} catch (error) { } catch (error) {
console.error(`Error checking ${key}:`, error); console.error(`Error updating ${key}:`, error);
} }
}) })
); );
@ -136,17 +142,14 @@ async function syncEmailsForDomainOptimized(domainId: number, bucket: string, s3
if (toInsert.length > 0) { if (toInsert.length > 0) {
console.log(`Processing ${toInsert.length} new emails...`); console.log(`Processing ${toInsert.length} new emails...`);
// Verarbeite in Batches
for (let i = 0; i < toInsert.length; i += BATCH_INSERT_SIZE) { for (let i = 0; i < toInsert.length; i += BATCH_INSERT_SIZE) {
const batch = toInsert.slice(i, i + BATCH_INSERT_SIZE); const batch = toInsert.slice(i, i + BATCH_INSERT_SIZE);
console.log(`Processing batch ${Math.floor(i/BATCH_INSERT_SIZE) + 1}/${Math.ceil(toInsert.length/BATCH_INSERT_SIZE)}`); console.log(`Processing batch ${Math.floor(i/BATCH_INSERT_SIZE) + 1}/${Math.ceil(toInsert.length/BATCH_INSERT_SIZE)}`);
// Paralleles Fetching und Parsing
const parseLimit = pLimit(CONCURRENT_EMAIL_PARSING); const parseLimit = pLimit(CONCURRENT_EMAIL_PARSING);
const emailDataPromises = batch.map(key => const emailDataPromises = batch.map(key =>
parseLimit(async () => { parseLimit(async () => {
try { try {
// Hole Objekt und Metadata parallel
const [getObjResponse, headResponse] = await Promise.all([ const [getObjResponse, headResponse] = await Promise.all([
pRetry(() => s3.send(new GetObjectCommand({ Bucket: bucket, Key: key })), { retries: 2 }), pRetry(() => s3.send(new GetObjectCommand({ Bucket: bucket, Key: key })), { retries: 2 }),
pRetry(() => s3.send(new HeadObjectCommand({ Bucket: bucket, Key: key })), { retries: 2 }) pRetry(() => s3.send(new HeadObjectCommand({ Bucket: bucket, Key: key })), { retries: 2 })
@ -154,11 +157,12 @@ async function syncEmailsForDomainOptimized(domainId: number, bucket: string, s3
const raw = await getBody(getObjResponse.Body as Readable); const raw = await getBody(getObjResponse.Body as Readable);
const parsed = await simpleParser(raw, { const parsed = await simpleParser(raw, {
skipHtmlToText: true, // Schneller, wenn Text nicht benötigt skipHtmlToText: true,
skipTextContent: false, skipTextContent: false,
skipImageLinks: true skipImageLinks: true
}); });
const metadata = headResponse.Metadata || {};
const to = extractAddresses(parsed.to); const to = extractAddresses(parsed.to);
const cc = extractAddresses(parsed.cc); const cc = extractAddresses(parsed.cc);
const bcc = extractAddresses(parsed.bcc); const bcc = extractAddresses(parsed.bcc);
@ -173,8 +177,13 @@ async function syncEmailsForDomainOptimized(domainId: number, bucket: string, s3
subject: parsed.subject, subject: parsed.subject,
html: parsed.html || parsed.textAsHtml, html: parsed.html || parsed.textAsHtml,
raw: raw.toString('utf-8'), raw: raw.toString('utf-8'),
processed: headResponse.Metadata?.[process.env.PROCESSED_META_KEY!] === process.env.PROCESSED_META_VALUE!, processed: metadata[process.env.PROCESSED_META_KEY!] === process.env.PROCESSED_META_VALUE!,
date: parsed.date || headResponse.LastModified, date: parsed.date || headResponse.LastModified,
// Neue Metadaten
processedAt: metadata['processed_at'] ? new Date(metadata['processed_at']) : null,
processedBy: metadata['processed_by'] || null,
queuedTo: metadata['queued_to'] || null,
status: metadata['status'] || null,
}; };
} catch (error) { } catch (error) {
console.error(`Error processing ${key}:`, error); console.error(`Error processing ${key}:`, error);
@ -185,7 +194,6 @@ async function syncEmailsForDomainOptimized(domainId: number, bucket: string, s3
const emailData = (await Promise.all(emailDataPromises)).filter(Boolean); const emailData = (await Promise.all(emailDataPromises)).filter(Boolean);
// Batch Insert
if (emailData.length > 0) { if (emailData.length > 0) {
await db.insert(emails).values(emailData); await db.insert(emails).values(emailData);
console.log(`Inserted ${emailData.length} emails`); console.log(`Inserted ${emailData.length} emails`);
@ -197,7 +205,6 @@ async function syncEmailsForDomainOptimized(domainId: number, bucket: string, s3
console.log(`Sync for ${bucket} completed in ${duration}s`); console.log(`Sync for ${bucket} completed in ${duration}s`);
} }
// Helper Funktion für Address-Extraktion
function extractAddresses(addressObj: any): string[] { function extractAddresses(addressObj: any): string[] {
if (!addressObj) return []; if (!addressObj) return [];
if (Array.isArray(addressObj)) { if (Array.isArray(addressObj)) {
@ -205,85 +212,3 @@ function extractAddresses(addressObj: any): string[] {
} }
return addressObj.value?.map((v: any) => v.address?.toLowerCase() || '').filter(Boolean) || []; return addressObj.value?.map((v: any) => v.address?.toLowerCase() || '').filter(Boolean) || [];
} }
// Optimierte Version mit Stream-Processing für sehr große Buckets
export async function syncEmailsForDomainStreaming(domainId: number, bucket: string, s3: S3Client) {
console.log(`Starting streaming sync for bucket: ${bucket}`);
// Verwende S3 Select für große Datensätze (falls unterstützt)
// Dies reduziert die übertragene Datenmenge erheblich
const existingKeys = new Set(
(await db
.select({ s3Key: emails.s3Key })
.from(emails)
.where(eq(emails.domainId, domainId))
).map(e => e.s3Key)
);
const processQueue: any[] = [];
const QUEUE_SIZE = 50;
// Stream-basierte Verarbeitung
let continuationToken: string | undefined;
do {
const response = await s3.send(new ListObjectsV2Command({
Bucket: bucket,
MaxKeys: 100,
ContinuationToken: continuationToken
}));
const newKeys = response.Contents?.filter(obj =>
obj.Key && !existingKeys.has(obj.Key)
) || [];
// Verarbeite parallel während wir weitere Keys holen
if (newKeys.length > 0) {
const batch = newKeys.slice(0, QUEUE_SIZE);
// Prozessiere Batch async (ohne await)
processBatchAsync(batch, bucket, domainId, s3);
}
continuationToken = response.NextContinuationToken;
} while (continuationToken);
}
async function processBatchAsync(batch: any[], bucket: string, domainId: number, s3: S3Client) {
// Async Batch Processing ohne den Hauptthread zu blockieren
const emailData = await Promise.all(
batch.map(async (obj) => {
try {
const [getObjResponse, headResponse] = await Promise.all([
s3.send(new GetObjectCommand({ Bucket: bucket, Key: obj.Key })),
s3.send(new HeadObjectCommand({ Bucket: bucket, Key: obj.Key }))
]);
const raw = await getBody(getObjResponse.Body as Readable);
const parsed = await simpleParser(raw);
return {
domainId,
s3Key: obj.Key,
from: parsed.from?.value[0]?.address,
to: extractAddresses(parsed.to),
cc: extractAddresses(parsed.cc),
bcc: extractAddresses(parsed.bcc),
subject: parsed.subject,
html: parsed.html || parsed.textAsHtml,
raw: raw.toString('utf-8'),
processed: headResponse.Metadata?.[process.env.PROCESSED_META_KEY!] === process.env.PROCESSED_META_VALUE!,
date: parsed.date || obj.LastModified,
};
} catch (error) {
console.error(`Error processing ${obj.Key}:`, error);
return null;
}
})
);
const validData = emailData.filter(Boolean);
if (validData.length > 0) {
await db.insert(emails).values(validData);
}
}