From d5b79867612046b6b74474e9d9af9bf67dff92e9 Mon Sep 17 00:00:00 2001
From: Andreas Knuth <andreas.knuth@gmail.com>
Date: Wed, 1 Oct 2025 17:19:53 -0500
Subject: [PATCH] annavillesda

---
 caddy/Caddyfile                          |   8 ++
 ses-lambda-new-python/lambda_function.py | 146 +++++++++++++++++++----
 2 files changed, 134 insertions(+), 20 deletions(-)

diff --git a/caddy/Caddyfile b/caddy/Caddyfile
index 8501b8b..532c5bd 100644
--- a/caddy/Caddyfile
+++ b/caddy/Caddyfile
@@ -240,4 +240,12 @@ email-srvr.com {
         output stderr
         format console
     }
+}
+annavillesda.org {
+    # Frontend (statische Dateien)
+    root * /home/aknuth/git/annaville-sda-site/dist
+    file_server
+    
+    # API-Anfragen an Backend weiterleiten
+    reverse_proxy /api/* localhost:3000
 }
\ No newline at end of file
diff --git a/ses-lambda-new-python/lambda_function.py b/ses-lambda-new-python/lambda_function.py
index 2dcdae3..e2426ee 100644
--- a/ses-lambda-new-python/lambda_function.py
+++ b/ses-lambda-new-python/lambda_function.py
@@ -11,10 +11,10 @@ from email.utils import getaddresses
 s3 = boto3.client('s3')
 
 # Environment variables (set these in the Lambda config)
-SMTP_HOST = os.environ.get('MAILCOW_SMTP_HOST', 'mail.email-srvr.com')
-SMTP_PORT = int(os.environ.get('MAILCOW_SMTP_PORT', '2525'))  # default to your mapped port
-SMTP_USER = os.environ.get('SMTP_USER') or os.environ.get('MAILCOW_SMTP_USER')
-SMTP_PASS = os.environ.get('SMTP_PASS') or os.environ.get('MAILCOW_SMTP_PASS')
+SMTP_HOST = os.environ.get('SMTP_HOST', 'mail.email-srvr.com')
+SMTP_PORT = int(os.environ.get('SMTP_PORT', '2525'))  # default to your mapped port
+SMTP_USER = os.environ.get('SMTP_USER') or os.environ.get('SMTP_USER')
+SMTP_PASS = os.environ.get('SMTP_PASS') or os.environ.get('SMTP_PASS')
 
 # Metadata key/value to mark processed objects (only set when at least one recipient delivered)
 PROCESSED_META_KEY = os.environ.get('PROCESSED_META_KEY', 'processed')
@@ -32,9 +32,16 @@ def bucket_to_domain(bucket: str) -> str:
 
 def parse_raw_message(raw_bytes: bytes):
     try:
-        parsed = BytesParser(policy=default).parsebytes(raw_bytes)
-    except Exception:
-        parsed = None
+        # Use SMTP policy for better compatibility with various email formats
+        from email.policy import SMTP
+        parsed = BytesParser(policy=SMTP).parsebytes(raw_bytes)
+    except Exception as e:
+        print(f"Error parsing with SMTP policy: {e}, trying with default policy")
+        try:
+            parsed = BytesParser(policy=default).parsebytes(raw_bytes)
+        except Exception as e2:
+            print(f"Error parsing with default policy: {e2}")
+            parsed = None
     return parsed
 
 def mark_object_processed(bucket: str, key: str):
@@ -99,6 +106,16 @@ def is_temporary_smtp_error(error_code):
             return 400 <= code < 500
     return False
 
+def is_spam_rejection(error_code):
+    """Check if the error is a spam rejection (should not be retried)"""
+    if isinstance(error_code, tuple) and len(error_code) >= 2:
+        code = error_code[0]
+        message = error_code[1]
+        # 554 with spam message is permanent - don't retry
+        if code == 554 and b'spam' in message.lower():
+            return True
+    return False
+
 def send_email_with_retry(smtp_host, smtp_port, smtp_user, smtp_pass, 
                           frm_addr, recipients, raw_message, local_helo,
                           max_retries=MAX_RETRIES):
@@ -190,9 +207,27 @@ def send_email_with_retry(smtp_host, smtp_port, smtp_user, smtp_pass,
                         
                 except Exception as e:
                     print(f"SMTP sendmail error on attempt {attempt + 1}: {e}")
+                    
+                    # Check if this is a spam rejection (permanent error that shouldn't be retried)
+                    if hasattr(e, 'smtp_code') and hasattr(e, 'smtp_error'):
+                        if is_spam_rejection((e.smtp_code, e.smtp_error)):
+                            print(f"Email rejected as spam (permanent error), not retrying")
+                            refused = {r: (e.smtp_code, e.smtp_error) for r in recipients}
+                            delivered = []
+                            break
+                    
+                    # For other errors, check if it's worth retrying
                     if attempt < max_retries:
-                        last_error = str(e)
-                        continue
+                        # Only retry if it might be temporary
+                        error_str = str(e)
+                        if '554' in error_str and 'spam' in error_str.lower():
+                            print(f"Email rejected as spam, not retrying")
+                            refused = {r: ('spam', str(e)) for r in recipients}
+                            delivered = []
+                            break
+                        else:
+                            last_error = str(e)
+                            continue
                     else:
                         traceback.print_exc()
                         refused = {r: ('error', str(e)) for r in recipients}
@@ -225,8 +260,11 @@ def lambda_handler(event, context):
     recipients = []
     bucket = None
     key = None
+    is_ses_event = False
 
     if 'ses' in rec:
+        # SES Event - vertrauenswürdig, hat die korrekten Empfänger
+        is_ses_event = True
         ses = rec['ses']
         msg_id = ses['mail']['messageId']
         recipients = ses['receipt'].get('recipients', [])
@@ -243,6 +281,7 @@ def lambda_handler(event, context):
         else:
             raise Exception("SES event but no recipients found")
     elif 's3' in rec:
+        # S3 Event - muss Empfänger aus Headers extrahieren
         s3info = rec['s3']
         bucket = s3info['bucket']['name']
         key = s3info['object']['key']
@@ -282,19 +321,86 @@ def lambda_handler(event, context):
     print(f"From: {frm_addr}, Subject: {subj}")
 
     # If recipients were not provided (S3 path), extract from headers
-    if not recipients:
+    if not recipients and not is_ses_event:
         if parsed:
-            to_addrs = [addr for _n, addr in getaddresses(parsed.get_all('to', []) or [])]
-            cc_addrs = [addr for _n, addr in getaddresses(parsed.get_all('cc', []) or [])]
-            bcc_addrs = [addr for _n, addr in getaddresses(parsed.get_all('bcc', []) or [])]
-            recipients = to_addrs + cc_addrs + bcc_addrs
-            print("Recipients from headers:", recipients)
-            # filter recipients to bucket domain (safety)
-            expected_domain = bucket_to_domain(bucket)
-            recipients = [r for r in recipients if r.lower().split('@')[-1] == expected_domain]
-            print(f"Recipients after domain filter ({expected_domain}): {recipients}")
+            expected_domain = bucket_to_domain(bucket).lower()
+            
+            # Debug: Print raw headers to understand what we're getting
+            print(f"=== DEBUG: Header Analysis ===")
+            print(f"Expected domain: {expected_domain}")
+            
+            # The email parser is case-insensitive for headers, so we only need to check once
+            # Get headers using standard case (parser handles case-insensitivity)
+            to_headers = parsed.get_all('to', []) or []
+            cc_headers = parsed.get_all('cc', []) or []
+            bcc_headers = parsed.get_all('bcc', []) or []
+            
+            if to_headers:
+                print(f"Found 'To' header: {to_headers}")
+            if cc_headers:
+                print(f"Found 'Cc' header: {cc_headers}")
+            if bcc_headers:
+                print(f"Found 'Bcc' header: {bcc_headers}")
+            
+            # Parse addresses from headers
+            to_addrs = [addr for _n, addr in getaddresses(to_headers) if addr]
+            cc_addrs = [addr for _n, addr in getaddresses(cc_headers) if addr]
+            bcc_addrs = [addr for _n, addr in getaddresses(bcc_headers) if addr]
+            
+            all_recipients = to_addrs + cc_addrs + bcc_addrs
+            
+            print(f"Parsed recipients - To: {to_addrs}, Cc: {cc_addrs}, Bcc: {bcc_addrs}")
+            
+            # Filter recipients to bucket domain with case-insensitive comparison
+            # and deduplicate using a set (preserving case)
+            recipients_set = set()
+            recipients = []
+            for addr in all_recipients:
+                # Extract domain part (everything after @)
+                if '@' in addr:
+                    addr_lower = addr.lower()
+                    addr_domain = addr_lower.split('@')[-1]
+                    if addr_domain == expected_domain:
+                        # Only add if not already in set (case-insensitive deduplication)
+                        if addr_lower not in recipients_set:
+                            recipients_set.add(addr_lower)
+                            recipients.append(addr)  # Keep original case
+                            print(f"Matched recipient: {addr} (domain: {addr_domain})")
+                        else:
+                            print(f"Skipped duplicate: {addr}")
+                    else:
+                        print(f"Skipped recipient: {addr} (domain: {addr_domain} != {expected_domain})")
+            
+            print(f"Final recipients after domain filter and deduplication: {recipients}")
+            
+            # If no recipients found, try additional headers
+            if not recipients:
+                print("WARNING: No recipients found in standard headers, checking additional headers...")
+                
+                # Check for X-Original-To, Delivered-To, Envelope-To
+                fallback_headers = ['X-Original-To', 'Delivered-To', 'Envelope-To', 
+                                   'x-original-to', 'delivered-to', 'envelope-to']
+                
+                for header_name in fallback_headers:
+                    header_val = parsed.get(header_name)
+                    if header_val:
+                        print(f"Found {header_name}: {header_val}")
+                        fallback_addrs = [addr for _n, addr in getaddresses([header_val]) if addr]
+                        for addr in fallback_addrs:
+                            if '@' in addr and addr.split('@')[-1].lower() == expected_domain:
+                                recipients.append(addr)
+                                print(f"Found recipient in {header_name}: {addr}")
+                
+                if not recipients:
+                    print(f"ERROR: Could not find any recipients for domain {expected_domain}")
+                    print(f"All addresses found: {all_recipients}")
+                    # Print all headers for debugging
+                    print("=== All Email Headers ===")
+                    for key in parsed.keys():
+                        print(f"{key}: {parsed.get(key)}")
+                    print("=== End Headers ===")
         else:
-            print("No parsed headers and no recipients provided; nothing to do.")
+            print("ERROR: Could not parse email headers")
             recipients = []
 
     # If after all we have no recipients, skip SMTP