e0b0c5e964
- Detect X-WS-* headers (e.g., X-WS-Attachment-UUID) as MIME headers to ensure attachment headers like Content-Type and Content-Disposition get normalized properly - Add test case for attachment header normalization - Bump version to 0.5.0 The normalization fixes malformed continuation lines in attachment headers generated by Infomaniak webmail, where lines like "name=file.pdf" are missing the required leading whitespace.
260 lines
10 KiB
Rust
260 lines
10 KiB
Rust
/// Email processing utilities for normalizing and fixing email format issues
|
|
use std::error::Error;
|
|
|
|
/// Normalizes a section of headers by fixing continuation lines
|
|
fn normalize_header_section(headers: &str, line_ending: &str) -> String {
|
|
let mut result = String::with_capacity(headers.len());
|
|
let mut previous_line_was_header = false;
|
|
let lines: Vec<&str> = headers.lines().collect();
|
|
let line_count = lines.len();
|
|
|
|
for (idx, line) in lines.iter().enumerate() {
|
|
let is_last_line = idx == line_count - 1;
|
|
|
|
// Check if this is a header line (starts with a field name followed by colon)
|
|
// RFC 5322: field names consist of printable ASCII except colon
|
|
let is_header_start = line.chars().next().map_or(false, |c| c.is_ascii_alphabetic())
|
|
&& line.find(':').map_or(false, |pos| {
|
|
// Ensure all characters before the colon are valid field-name characters
|
|
line[..pos].chars().all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
|
|
});
|
|
|
|
if is_header_start {
|
|
result.push_str(line);
|
|
if !is_last_line {
|
|
result.push_str(line_ending);
|
|
}
|
|
previous_line_was_header = true;
|
|
} else if previous_line_was_header {
|
|
// This is a continuation line
|
|
if line.chars().next().map_or(false, |c| c.is_whitespace()) {
|
|
// Already has leading whitespace
|
|
result.push_str(line);
|
|
} else if !line.is_empty() {
|
|
// Missing leading whitespace - add a space
|
|
result.push(' ');
|
|
result.push_str(line);
|
|
} else {
|
|
result.push_str(line);
|
|
}
|
|
if !is_last_line {
|
|
result.push_str(line_ending);
|
|
}
|
|
} else {
|
|
result.push_str(line);
|
|
if !is_last_line {
|
|
result.push_str(line_ending);
|
|
}
|
|
previous_line_was_header = false;
|
|
}
|
|
}
|
|
|
|
result
|
|
}
|
|
|
|
/// Normalizes email headers to ensure RFC 5322 compliance
|
|
///
|
|
/// This function fixes improperly formatted header continuation lines by ensuring
|
|
/// that continuation lines start with at least one whitespace character (space or tab).
|
|
///
|
|
/// According to RFC 5322 Section 2.2.3:
|
|
/// - Header fields can be continued on subsequent lines
|
|
/// - Continuation lines MUST begin with at least one LWSP (space or tab)
|
|
///
|
|
/// This function processes both main email headers AND MIME part headers within the body.
|
|
/// It preserves the original line endings (CRLF or LF) of the email.
|
|
///
|
|
/// # Arguments
|
|
/// * `email` - The raw email content as a string
|
|
///
|
|
/// # Returns
|
|
/// * The normalized email with properly formatted header continuation lines
|
|
pub fn normalize_headers(email: &str) -> Result<String, Box<dyn Error>> {
|
|
// Detect line ending style: CRLF (Windows/SMTP) or LF (Unix)
|
|
let line_ending = if email.contains("\r\n") { "\r\n" } else { "\n" };
|
|
let separator = if line_ending == "\r\n" { "\r\n\r\n" } else { "\n\n" };
|
|
|
|
// Find the end of main headers
|
|
let main_headers_end = match email.find(separator) {
|
|
Some(pos) => pos,
|
|
None => return Ok(email.to_string()),
|
|
};
|
|
|
|
// Process main headers
|
|
let main_headers = &email[..main_headers_end];
|
|
let normalized_main_headers = normalize_header_section(main_headers, line_ending);
|
|
|
|
// Process the body - look for MIME part headers
|
|
let body_start = main_headers_end + separator.len();
|
|
let body = &email[body_start..];
|
|
|
|
let mut result = normalized_main_headers;
|
|
result.push_str(separator);
|
|
|
|
// Process body, looking for MIME part headers
|
|
// MIME part headers appear after boundary markers and before the next empty line
|
|
let mut current_pos = 0;
|
|
|
|
while current_pos < body.len() {
|
|
// Look for next empty line (potential MIME part header separator)
|
|
if let Some(next_sep_pos) = body[current_pos..].find(separator) {
|
|
let absolute_sep_pos = current_pos + next_sep_pos;
|
|
let section_before = &body[current_pos..absolute_sep_pos];
|
|
|
|
// Check if this section is MIME part headers:
|
|
// - Must contain at least one header line
|
|
// - MIME part headers typically include Content-Type, Content-Transfer-Encoding, etc.
|
|
// - Should NOT be mixed with body content (HTML, text, etc.)
|
|
|
|
let lines: Vec<&str> = section_before.lines().collect();
|
|
let mut header_count = 0;
|
|
let mut non_header_count = 0;
|
|
let mut has_mime_headers = false;
|
|
|
|
let mut last_was_header = false;
|
|
|
|
for line in &lines {
|
|
if line.is_empty() {
|
|
continue;
|
|
}
|
|
|
|
// Check if this is a MIME boundary marker
|
|
if line.starts_with("--") && line.len() > 2 {
|
|
continue; // Skip boundary markers in the analysis
|
|
}
|
|
|
|
// Check if this is a header start line
|
|
let is_header_start = line.chars().next().map_or(false, |c| c.is_ascii_alphabetic())
|
|
&& line.find(':').map_or(false, |pos| {
|
|
line[..pos].chars().all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
|
|
});
|
|
|
|
// Check if this is a continuation line (starts with whitespace)
|
|
let is_continuation = line.chars().next().map_or(false, |c| c.is_whitespace());
|
|
|
|
if is_header_start {
|
|
header_count += 1;
|
|
last_was_header = true;
|
|
// Check for typical MIME headers
|
|
if line.starts_with("Content-") || line.starts_with("MIME-Version") || line.starts_with("X-WS-") {
|
|
has_mime_headers = true;
|
|
}
|
|
} else if is_continuation || last_was_header {
|
|
// This is either a proper continuation line OR a line following a header
|
|
// (which might be a malformed continuation line missing whitespace)
|
|
// In either case, don't count it as body content
|
|
continue;
|
|
} else {
|
|
// Not a header, not a continuation - this is body content
|
|
non_header_count += 1;
|
|
last_was_header = false;
|
|
}
|
|
}
|
|
|
|
// Only normalize if this section contains MIME headers and no body content
|
|
// (boundary markers are OK and expected)
|
|
if header_count > 0 && has_mime_headers && non_header_count == 0 {
|
|
let normalized_section = normalize_header_section(section_before, line_ending);
|
|
result.push_str(&normalized_section);
|
|
result.push_str(separator);
|
|
current_pos = absolute_sep_pos + separator.len();
|
|
} else {
|
|
// Not MIME headers, copy as-is
|
|
result.push_str(&body[current_pos..absolute_sep_pos + separator.len()]);
|
|
current_pos = absolute_sep_pos + separator.len();
|
|
}
|
|
} else {
|
|
// No more separators, copy rest of body as-is
|
|
result.push_str(&body[current_pos..]);
|
|
break;
|
|
}
|
|
}
|
|
|
|
Ok(result)
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_normalize_headers_with_proper_continuation() {
|
|
let email = "From: test@example.com\nSubject: Test\n line 2\nTo: user@example.com\n\nBody";
|
|
let result = normalize_headers(email).unwrap();
|
|
assert!(result.contains("Subject: Test\n line 2\n"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_normalize_headers_with_missing_whitespace() {
|
|
let email = "From: test@example.com\nSubject: Test\nline 2\nTo: user@example.com\n\nBody";
|
|
let result = normalize_headers(email).unwrap();
|
|
assert!(result.contains("Subject: Test\n line 2\n"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_normalize_headers_preserves_body() {
|
|
let email = "From: test@example.com\nSubject: Test\n\nBody line 1\nBody line 2";
|
|
let result = normalize_headers(email).unwrap();
|
|
assert!(result.contains("Body line 1\nBody line 2"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_normalize_headers_complex_continuation() {
|
|
let email = concat!(
|
|
"ARC-Seal: i=1; a=rsa-sha256; t=1764789271; cv=none;\n",
|
|
"d=google.com; s=arc-20240605;\n",
|
|
"b=WzYePPFoiBLQx6r6obqcdcSu658wc1rT9O383Yux3i6ngaTS4Z4Jc1vKOZ128wn1rR\n",
|
|
"To: test@example.com\n",
|
|
"\n",
|
|
"Body"
|
|
);
|
|
let result = normalize_headers(email).unwrap();
|
|
assert!(result.contains(" d=google.com; s=arc-20240605;"));
|
|
assert!(result.contains(" b=WzYePPFoiBLQx6r6obqcdcSu658wc1rT9O383Yux3i6ngaTS4Z4Jc1vKOZ128wn1rR"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_normalize_headers_preserves_crlf() {
|
|
let email = "From: test@example.com\r\nSubject: Test\r\n\r\nBody";
|
|
let result = normalize_headers(email).unwrap();
|
|
assert!(result.contains("\r\n"));
|
|
assert!(!result.contains("\n\n")); // Should not have double LF
|
|
}
|
|
|
|
#[test]
|
|
fn test_normalize_headers_crlf_continuation() {
|
|
let email = "From: test@example.com\r\nSubject: Test\r\nline 2\r\nTo: user@example.com\r\n\r\nBody";
|
|
let result = normalize_headers(email).unwrap();
|
|
assert!(result.contains("Subject: Test\r\n line 2\r\n"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_normalize_headers_no_changes_needed() {
|
|
let email = "From: test@example.com\r\nSubject: Test\r\n line 2\r\nTo: user@example.com\r\n\r\nBody";
|
|
let result = normalize_headers(email).unwrap();
|
|
assert_eq!(email, result, "Email should not be modified if already compliant");
|
|
}
|
|
|
|
#[test]
|
|
fn test_normalize_attachment_headers() {
|
|
let email = concat!(
|
|
"From: test@example.com\r\n",
|
|
"Subject: Test\r\n",
|
|
"\r\n",
|
|
"--boundary\r\n",
|
|
"X-WS-Attachment-UUID: 123\r\n",
|
|
"Content-Type: application/pdf;\r\n",
|
|
"name=test.pdf\r\n",
|
|
"Content-Disposition: attachment;\r\n",
|
|
"filename=test.pdf\r\n",
|
|
"\r\n",
|
|
"data"
|
|
);
|
|
let result = normalize_headers(email).unwrap();
|
|
assert!(result.contains("Content-Type: application/pdf;\r\n name=test.pdf"),
|
|
"Should add space to Content-Type continuation");
|
|
assert!(result.contains("Content-Disposition: attachment;\r\n filename=test.pdf"),
|
|
"Should add space to Content-Disposition continuation");
|
|
}
|
|
}
|