/// Email processing utilities for normalizing and fixing email format issues use std::error::Error; /// Normalizes a section of headers by fixing continuation lines fn normalize_header_section(headers: &str, line_ending: &str) -> String { let mut result = String::with_capacity(headers.len()); let mut previous_line_was_header = false; let lines: Vec<&str> = headers.lines().collect(); let line_count = lines.len(); for (idx, line) in lines.iter().enumerate() { let is_last_line = idx == line_count - 1; // Check if this is a header line (starts with a field name followed by colon) // RFC 5322: field names consist of printable ASCII except colon let is_header_start = line.chars().next().map_or(false, |c| c.is_ascii_alphabetic()) && line.find(':').map_or(false, |pos| { // Ensure all characters before the colon are valid field-name characters line[..pos].chars().all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_') }); if is_header_start { result.push_str(line); if !is_last_line { result.push_str(line_ending); } previous_line_was_header = true; } else if previous_line_was_header { // This is a continuation line if line.chars().next().map_or(false, |c| c.is_whitespace()) { // Already has leading whitespace result.push_str(line); } else if !line.is_empty() { // Missing leading whitespace - add a space result.push(' '); result.push_str(line); } else { result.push_str(line); } if !is_last_line { result.push_str(line_ending); } } else { result.push_str(line); if !is_last_line { result.push_str(line_ending); } previous_line_was_header = false; } } result } /// Normalizes email headers to ensure RFC 5322 compliance /// /// This function fixes improperly formatted header continuation lines by ensuring /// that continuation lines start with at least one whitespace character (space or tab). /// /// According to RFC 5322 Section 2.2.3: /// - Header fields can be continued on subsequent lines /// - Continuation lines MUST begin with at least one LWSP (space or tab) /// /// This function processes both main email headers AND MIME part headers within the body. /// It preserves the original line endings (CRLF or LF) of the email. /// /// # Arguments /// * `email` - The raw email content as a string /// /// # Returns /// * The normalized email with properly formatted header continuation lines pub fn normalize_headers(email: &str) -> Result> { // Detect line ending style: CRLF (Windows/SMTP) or LF (Unix) let line_ending = if email.contains("\r\n") { "\r\n" } else { "\n" }; let separator = if line_ending == "\r\n" { "\r\n\r\n" } else { "\n\n" }; // Find the end of main headers let main_headers_end = match email.find(separator) { Some(pos) => pos, None => return Ok(email.to_string()), }; // Process main headers let main_headers = &email[..main_headers_end]; let normalized_main_headers = normalize_header_section(main_headers, line_ending); // Process the body - look for MIME part headers let body_start = main_headers_end + separator.len(); let body = &email[body_start..]; let mut result = normalized_main_headers; result.push_str(separator); // Process body, looking for MIME part headers // MIME part headers appear after boundary markers and before the next empty line let mut current_pos = 0; while current_pos < body.len() { // Look for next empty line (potential MIME part header separator) if let Some(next_sep_pos) = body[current_pos..].find(separator) { let absolute_sep_pos = current_pos + next_sep_pos; let section_before = &body[current_pos..absolute_sep_pos]; // Check if this section is MIME part headers: // - Must contain at least one header line // - MIME part headers typically include Content-Type, Content-Transfer-Encoding, etc. // - Should NOT be mixed with body content (HTML, text, etc.) let lines: Vec<&str> = section_before.lines().collect(); let mut header_count = 0; let mut non_header_count = 0; let mut has_mime_headers = false; let mut last_was_header = false; for line in &lines { if line.is_empty() { continue; } // Check if this is a MIME boundary marker if line.starts_with("--") && line.len() > 2 { continue; // Skip boundary markers in the analysis } // Check if this is a header start line let is_header_start = line.chars().next().map_or(false, |c| c.is_ascii_alphabetic()) && line.find(':').map_or(false, |pos| { line[..pos].chars().all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_') }); // Check if this is a continuation line (starts with whitespace) let is_continuation = line.chars().next().map_or(false, |c| c.is_whitespace()); if is_header_start { header_count += 1; last_was_header = true; // Check for typical MIME headers if line.starts_with("Content-") || line.starts_with("MIME-Version") || line.starts_with("X-WS-") { has_mime_headers = true; } } else if is_continuation || last_was_header { // This is either a proper continuation line OR a line following a header // (which might be a malformed continuation line missing whitespace) // In either case, don't count it as body content continue; } else { // Not a header, not a continuation - this is body content non_header_count += 1; last_was_header = false; } } // Only normalize if this section contains MIME headers and no body content // (boundary markers are OK and expected) if header_count > 0 && has_mime_headers && non_header_count == 0 { let normalized_section = normalize_header_section(section_before, line_ending); result.push_str(&normalized_section); result.push_str(separator); current_pos = absolute_sep_pos + separator.len(); } else { // Not MIME headers, copy as-is result.push_str(&body[current_pos..absolute_sep_pos + separator.len()]); current_pos = absolute_sep_pos + separator.len(); } } else { // No more separators, copy rest of body as-is result.push_str(&body[current_pos..]); break; } } Ok(result) } #[cfg(test)] mod tests { use super::*; #[test] fn test_normalize_headers_with_proper_continuation() { let email = "From: test@example.com\nSubject: Test\n line 2\nTo: user@example.com\n\nBody"; let result = normalize_headers(email).unwrap(); assert!(result.contains("Subject: Test\n line 2\n")); } #[test] fn test_normalize_headers_with_missing_whitespace() { let email = "From: test@example.com\nSubject: Test\nline 2\nTo: user@example.com\n\nBody"; let result = normalize_headers(email).unwrap(); assert!(result.contains("Subject: Test\n line 2\n")); } #[test] fn test_normalize_headers_preserves_body() { let email = "From: test@example.com\nSubject: Test\n\nBody line 1\nBody line 2"; let result = normalize_headers(email).unwrap(); assert!(result.contains("Body line 1\nBody line 2")); } #[test] fn test_normalize_headers_complex_continuation() { let email = concat!( "ARC-Seal: i=1; a=rsa-sha256; t=1764789271; cv=none;\n", "d=google.com; s=arc-20240605;\n", "b=WzYePPFoiBLQx6r6obqcdcSu658wc1rT9O383Yux3i6ngaTS4Z4Jc1vKOZ128wn1rR\n", "To: test@example.com\n", "\n", "Body" ); let result = normalize_headers(email).unwrap(); assert!(result.contains(" d=google.com; s=arc-20240605;")); assert!(result.contains(" b=WzYePPFoiBLQx6r6obqcdcSu658wc1rT9O383Yux3i6ngaTS4Z4Jc1vKOZ128wn1rR")); } #[test] fn test_normalize_headers_preserves_crlf() { let email = "From: test@example.com\r\nSubject: Test\r\n\r\nBody"; let result = normalize_headers(email).unwrap(); assert!(result.contains("\r\n")); assert!(!result.contains("\n\n")); // Should not have double LF } #[test] fn test_normalize_headers_crlf_continuation() { let email = "From: test@example.com\r\nSubject: Test\r\nline 2\r\nTo: user@example.com\r\n\r\nBody"; let result = normalize_headers(email).unwrap(); assert!(result.contains("Subject: Test\r\n line 2\r\n")); } #[test] fn test_normalize_headers_no_changes_needed() { let email = "From: test@example.com\r\nSubject: Test\r\n line 2\r\nTo: user@example.com\r\n\r\nBody"; let result = normalize_headers(email).unwrap(); assert_eq!(email, result, "Email should not be modified if already compliant"); } #[test] fn test_normalize_attachment_headers() { let email = concat!( "From: test@example.com\r\n", "Subject: Test\r\n", "\r\n", "--boundary\r\n", "X-WS-Attachment-UUID: 123\r\n", "Content-Type: application/pdf;\r\n", "name=test.pdf\r\n", "Content-Disposition: attachment;\r\n", "filename=test.pdf\r\n", "\r\n", "data" ); let result = normalize_headers(email).unwrap(); assert!(result.contains("Content-Type: application/pdf;\r\n name=test.pdf"), "Should add space to Content-Type continuation"); assert!(result.contains("Content-Disposition: attachment;\r\n filename=test.pdf"), "Should add space to Content-Disposition continuation"); } }