This commit is contained in:
Andreas Knuth 2025-10-09 17:58:27 -05:00
parent 5533fbff14
commit b9066a8f59
1 changed files with 197 additions and 28 deletions

View File

@ -4,6 +4,8 @@ import smtplib
import time import time
import traceback import traceback
import json import json
import random
import signal
from email.parser import BytesParser from email.parser import BytesParser
from email.policy import default from email.policy import default
from email.utils import getaddresses from email.utils import getaddresses
@ -12,9 +14,9 @@ s3 = boto3.client('s3')
# Environment variables (set these in the Lambda config) # Environment variables (set these in the Lambda config)
SMTP_HOST = os.environ.get('SMTP_HOST', 'mail.email-srvr.com') SMTP_HOST = os.environ.get('SMTP_HOST', 'mail.email-srvr.com')
SMTP_PORT = int(os.environ.get('SMTP_PORT', '2525')) # default to your mapped port SMTP_PORT = int(os.environ.get('SMTP_PORT', '2525'))
SMTP_USER = os.environ.get('SMTP_USER') or os.environ.get('SMTP_USER') SMTP_USER = os.environ.get('SMTP_USER')
SMTP_PASS = os.environ.get('SMTP_PASS') or os.environ.get('SMTP_PASS') SMTP_PASS = os.environ.get('SMTP_PASS')
# Metadata key/value to mark processed objects (only set when at least one recipient delivered) # Metadata key/value to mark processed objects (only set when at least one recipient delivered)
PROCESSED_META_KEY = os.environ.get('PROCESSED_META_KEY', 'processed') PROCESSED_META_KEY = os.environ.get('PROCESSED_META_KEY', 'processed')
@ -22,14 +24,32 @@ PROCESSED_META_VALUE = os.environ.get('PROCESSED_META_VALUE', 'true')
# Retry configuration # Retry configuration
MAX_RETRIES = int(os.environ.get('MAX_SMTP_RETRIES', '3')) MAX_RETRIES = int(os.environ.get('MAX_SMTP_RETRIES', '3'))
RETRY_DELAYS = [1, 5, 15] # Sekunden zwischen Versuchen BASE_RETRY_DELAY = int(os.environ.get('BASE_RETRY_DELAY', '2'))
# Email size limit (25 MB - Lambda memory safety margin)
MAX_EMAIL_SIZE_MB = int(os.environ.get('MAX_EMAIL_SIZE_MB', '25'))
MAX_EMAIL_SIZE_BYTES = MAX_EMAIL_SIZE_MB * 1024 * 1024
# ============================================================================
# VERBESSERUNG 5: Timeout Protection
# ============================================================================
class TimeoutException(Exception):
pass
def timeout_handler(signum, frame):
raise TimeoutException("Lambda approaching timeout")
def domain_to_bucket(domain: str) -> str: def domain_to_bucket(domain: str) -> str:
return domain.replace('.', '-') + '-emails' return domain.replace('.', '-') + '-emails'
def bucket_to_domain(bucket: str) -> str: def bucket_to_domain(bucket: str) -> str:
return bucket.replace('-emails', '').replace('-', '.') return bucket.replace('-emails', '').replace('-', '.')
def parse_raw_message(raw_bytes: bytes): def parse_raw_message(raw_bytes: bytes):
try: try:
# Use SMTP policy for better compatibility with various email formats # Use SMTP policy for better compatibility with various email formats
@ -44,6 +64,7 @@ def parse_raw_message(raw_bytes: bytes):
parsed = None parsed = None
return parsed return parsed
def mark_object_processed(bucket: str, key: str): def mark_object_processed(bucket: str, key: str):
try: try:
head = s3.head_object(Bucket=bucket, Key=key) head = s3.head_object(Bucket=bucket, Key=key)
@ -53,6 +74,9 @@ def mark_object_processed(bucket: str, key: str):
return return
new_meta = current_metadata.copy() new_meta = current_metadata.copy()
new_meta[PROCESSED_META_KEY] = PROCESSED_META_VALUE new_meta[PROCESSED_META_KEY] = PROCESSED_META_VALUE
new_meta['processed_at'] = str(int(time.time()))
# Remove processing lock
new_meta.pop('processing_started', None)
# Copy object onto itself replacing metadata # Copy object onto itself replacing metadata
s3.copy_object( s3.copy_object(
Bucket=bucket, Bucket=bucket,
@ -61,11 +85,12 @@ def mark_object_processed(bucket: str, key: str):
Metadata=new_meta, Metadata=new_meta,
MetadataDirective='REPLACE' MetadataDirective='REPLACE'
) )
print(f"Marked {bucket}/{key} as processed.") print(f"Marked {bucket}/{key} as processed at {new_meta['processed_at']}")
except Exception as e: except Exception as e:
print("Failed to mark processed metadata:", e) print("Failed to mark processed metadata:", e)
traceback.print_exc() traceback.print_exc()
def update_retry_metadata(bucket: str, key: str, retry_count: int, last_error: str = None): def update_retry_metadata(bucket: str, key: str, retry_count: int, last_error: str = None):
"""Update S3 object metadata with retry information""" """Update S3 object metadata with retry information"""
try: try:
@ -89,6 +114,7 @@ def update_retry_metadata(bucket: str, key: str, retry_count: int, last_error: s
except Exception as e: except Exception as e:
print(f"Failed to update retry metadata: {e}") print(f"Failed to update retry metadata: {e}")
def get_retry_count(bucket: str, key: str) -> int: def get_retry_count(bucket: str, key: str) -> int:
"""Get current retry count from S3 metadata""" """Get current retry count from S3 metadata"""
try: try:
@ -98,6 +124,7 @@ def get_retry_count(bucket: str, key: str) -> int:
except Exception: except Exception:
return 0 return 0
def is_temporary_smtp_error(error_code): def is_temporary_smtp_error(error_code):
"""Check if SMTP error code indicates a temporary failure (4xx)""" """Check if SMTP error code indicates a temporary failure (4xx)"""
if isinstance(error_code, tuple) and len(error_code) >= 1: if isinstance(error_code, tuple) and len(error_code) >= 1:
@ -106,6 +133,7 @@ def is_temporary_smtp_error(error_code):
return 400 <= code < 500 return 400 <= code < 500
return False return False
def is_spam_rejection(error_code): def is_spam_rejection(error_code):
"""Check if the error is a spam rejection (should not be retried)""" """Check if the error is a spam rejection (should not be retried)"""
if isinstance(error_code, tuple) and len(error_code) >= 2: if isinstance(error_code, tuple) and len(error_code) >= 2:
@ -116,10 +144,14 @@ def is_spam_rejection(error_code):
return True return True
return False return False
# ============================================================================
# VERBESSERUNG 3: Exponential Backoff with Jitter
# ============================================================================
def send_email_with_retry(smtp_host, smtp_port, smtp_user, smtp_pass, def send_email_with_retry(smtp_host, smtp_port, smtp_user, smtp_pass,
frm_addr, recipients, raw_message, local_helo, frm_addr, recipients, raw_message, local_helo,
max_retries=MAX_RETRIES): max_retries=MAX_RETRIES):
"""Send email with retry logic for temporary failures""" """Send email with retry logic for temporary failures using exponential backoff"""
delivered = [] delivered = []
refused = {} refused = {}
@ -127,9 +159,13 @@ def send_email_with_retry(smtp_host, smtp_port, smtp_user, smtp_pass,
for attempt in range(max_retries + 1): for attempt in range(max_retries + 1):
if attempt > 0: if attempt > 0:
delay = RETRY_DELAYS[min(attempt - 1, len(RETRY_DELAYS) - 1)] # Exponential backoff with jitter: 2s, 4s, 8s (configurable via BASE_RETRY_DELAY)
print(f"Retry attempt {attempt}/{max_retries} after {delay}s delay...") delay = BASE_RETRY_DELAY * (2 ** (attempt - 1))
time.sleep(delay) # Add jitter to prevent thundering herd
jitter = random.uniform(0, delay * 0.3) # +0-30% random jitter
total_delay = delay + jitter
print(f"Retry attempt {attempt}/{max_retries} after {total_delay:.1f}s delay (base: {delay}s + jitter: {jitter:.1f}s)")
time.sleep(total_delay)
try: try:
with smtplib.SMTP(smtp_host, smtp_port, timeout=30, local_hostname=local_helo) as smtp: with smtplib.SMTP(smtp_host, smtp_port, timeout=30, local_hostname=local_helo) as smtp:
@ -247,7 +283,34 @@ def send_email_with_retry(smtp_host, smtp_port, smtp_user, smtp_pass,
return delivered, refused return delivered, refused
def lambda_handler(event, context):
# ============================================================================
# VERBESSERUNG 4: Structured Logging
# ============================================================================
def log_processing_result(bucket: str, key: str, delivered: list, refused: dict, retry_count: int):
"""Log processing results to CloudWatch in structured format for easier analysis"""
result = {
'timestamp': int(time.time()),
'bucket': bucket,
'key': key,
'delivered_count': len(delivered),
'refused_count': len(refused),
'retry_count': retry_count,
'delivered_recipients': delivered,
'refused_recipients': list(refused.keys()) if refused else [],
'success': len(delivered) > 0
}
# Structured logging for CloudWatch Insights
print(f"PROCESSING_RESULT: {json.dumps(result)}")
return result
# ============================================================================
# MAIN PROCESSING FUNCTION (extracted for timeout handling)
# ============================================================================
def _process_email(event, context):
"""Main email processing logic (extracted for timeout protection)"""
print("Event:", event) print("Event:", event)
ses = None ses = None
try: try:
@ -290,25 +353,83 @@ def lambda_handler(event, context):
else: else:
raise Exception("Unknown event type") raise Exception("Unknown event type")
# Check if already processed # ========================================================================
# VERBESSERUNG 1: Duplicate Prevention with Processing Lock
# ========================================================================
try: try:
head = s3.head_object(Bucket=bucket, Key=key) head = s3.head_object(Bucket=bucket, Key=key)
if head.get('Metadata', {}).get(PROCESSED_META_KEY) == PROCESSED_META_VALUE: metadata = head.get('Metadata', {}) or {}
print(f"Object {key} in {bucket} already processed. Exiting.")
# Check if already processed
if metadata.get(PROCESSED_META_KEY) == PROCESSED_META_VALUE:
processed_at = metadata.get('processed_at', 'unknown time')
print(f"Object {key} already processed at {processed_at}")
return {'statusCode': 200, 'body': 'already processed'} return {'statusCode': 200, 'body': 'already processed'}
# Check if currently being processed (lock mechanism)
processing_started = metadata.get('processing_started')
if processing_started:
processing_age = time.time() - float(processing_started)
if processing_age < 300: # 5 minutes lock
print(f"Object {key} is being processed by another Lambda (started {processing_age:.0f}s ago)")
return {'statusCode': 200, 'body': 'already being processed'}
else:
print(f"Stale processing lock detected ({processing_age:.0f}s old), continuing")
# Set processing lock
new_meta = metadata.copy()
new_meta['processing_started'] = str(int(time.time()))
s3.copy_object(
Bucket=bucket,
Key=key,
CopySource={'Bucket': bucket, 'Key': key},
Metadata=new_meta,
MetadataDirective='REPLACE'
)
print(f"Set processing lock on {key}")
except s3.exceptions.NoSuchKey:
print(f"Object {key} no longer exists, skipping")
return {'statusCode': 404, 'body': 'object not found'}
except Exception as e: except Exception as e:
print("head_object error (continuing):", e) print(f"Error checking/setting processing lock: {e}")
# Continue anyway if lock fails (better than dropping email)
# Check retry count - if too many retries, give up # Check retry count - if too many retries, give up
retry_count = get_retry_count(bucket, key) retry_count = get_retry_count(bucket, key)
if retry_count >= MAX_RETRIES * 2: # Safety limit if retry_count >= MAX_RETRIES * 2: # Safety limit
print(f"Object {key} has been retried {retry_count} times, giving up") print(f"Object {key} has been retried {retry_count} times, giving up")
mark_object_processed(bucket, key) # Mark as processed to prevent infinite retries
return {'statusCode': 200, 'body': f'max retries exceeded ({retry_count})'} return {'statusCode': 200, 'body': f'max retries exceeded ({retry_count})'}
# Get raw mail bytes # ========================================================================
# VERBESSERUNG 2: Memory Optimization with Size Check
# ========================================================================
try:
resp = s3.get_object(Bucket=bucket, Key=key) resp = s3.get_object(Bucket=bucket, Key=key)
content_length = int(resp.get('ContentLength', 0))
# Safety check: Skip emails larger than MAX_EMAIL_SIZE_MB
if content_length > MAX_EMAIL_SIZE_BYTES:
print(f"ERROR: Email too large ({content_length/1024/1024:.1f} MB), maximum is {MAX_EMAIL_SIZE_MB} MB")
# Mark as processed to prevent infinite retries
mark_object_processed(bucket, key)
return {
'statusCode': 413, # Payload Too Large
'body': json.dumps({
'error': 'email_too_large',
'size_mb': round(content_length/1024/1024, 2),
'max_mb': MAX_EMAIL_SIZE_MB
})
}
raw_bytes = resp['Body'].read() raw_bytes = resp['Body'].read()
print(f"Loaded {len(raw_bytes)} bytes from s3://{bucket}/{key}") print(f"Loaded {len(raw_bytes)} bytes ({len(raw_bytes)/1024:.1f} KB) from s3://{bucket}/{key}")
except Exception as e:
print(f"ERROR reading from S3: {e}")
traceback.print_exc()
return {'statusCode': 500, 'body': f'S3 read error: {str(e)}'}
parsed = parse_raw_message(raw_bytes) parsed = parse_raw_message(raw_bytes)
subj = parsed.get('subject', '(no subject)') if parsed else '(no subject)' subj = parsed.get('subject', '(no subject)') if parsed else '(no subject)'
@ -396,8 +517,8 @@ def lambda_handler(event, context):
print(f"All addresses found: {all_recipients}") print(f"All addresses found: {all_recipients}")
# Print all headers for debugging # Print all headers for debugging
print("=== All Email Headers ===") print("=== All Email Headers ===")
for key in parsed.keys(): for key_h in parsed.keys():
print(f"{key}: {parsed.get(key)}") print(f"{key_h}: {parsed.get(key_h)}")
print("=== End Headers ===") print("=== End Headers ===")
else: else:
print("ERROR: Could not parse email headers") print("ERROR: Could not parse email headers")
@ -444,12 +565,60 @@ def lambda_handler(event, context):
else: else:
print("No successful deliveries; NOT setting processed metadata so message can be re-evaluated later.") print("No successful deliveries; NOT setting processed metadata so message can be re-evaluated later.")
# Log structured result
result = log_processing_result(bucket, key, delivered, refused, retry_count)
return { return {
'statusCode': 200, 'statusCode': 200 if delivered else 500,
'body': json.dumps(result)
}
# ============================================================================
# LAMBDA HANDLER with TIMEOUT PROTECTION
# ============================================================================
def lambda_handler(event, context):
"""
Lambda entry point with timeout protection
Recommended Lambda Configuration:
- Memory: 512 MB
- Timeout: 60 seconds
- Environment Variables:
- SMTP_HOST, SMTP_PORT, SMTP_USER, SMTP_PASS
- MAX_SMTP_RETRIES=3
- BASE_RETRY_DELAY=2
- MAX_EMAIL_SIZE_MB=25
"""
# Set up timeout protection (stop 5 seconds before Lambda timeout)
remaining_time = context.get_remaining_time_in_millis() / 1000 if context else 60
safety_margin = 5 # seconds
max_execution_time = max(10, remaining_time - safety_margin)
signal.signal(signal.SIGALRM, timeout_handler)
signal.alarm(int(max_execution_time))
try:
return _process_email(event, context)
except TimeoutException:
print(f"WARNING: Lambda approaching timeout after {max_execution_time}s, gracefully exiting")
# Don't mark as processed so it can be retried
return {
'statusCode': 408, # Request Timeout
'body': json.dumps({ 'body': json.dumps({
'processed': bool(delivered), 'error': 'lambda_timeout',
'delivered': delivered, 'execution_time': max_execution_time
'refused_count': len(refused),
'retry_count': retry_count
}) })
} }
except Exception as e:
print(f"FATAL ERROR in lambda_handler: {e}")
traceback.print_exc()
return {
'statusCode': 500,
'body': json.dumps({
'error': 'internal_error',
'message': str(e)
})
}
finally:
signal.alarm(0) # Cancel alarm