feat: normalize emails

This commit is contained in:
2025-12-04 11:59:30 +01:00
parent 4482c4041e
commit 610d10fd1e
12 changed files with 683 additions and 18 deletions

42
CHANGELOG.md Normal file
View File

@@ -0,0 +1,42 @@
# Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [0.2.0] - 2025-12-04
### Added
- Email header normalization to ensure RFC 5322 compliance
- New `normalize-imap` binary tool to fix existing malformed emails in IMAP inbox
- IMAP fetch functionality to retrieve all messages from inbox
- IMAP delete and expunge functionality for message management
- Dry-run mode for the normalization tool (`--dry-run` flag)
- Confirmation prompts with skip option (`--yes` flag)
- Comprehensive email header continuation line fixing
- Analysis and reporting of which emails need normalization
### Changed
- Refactored project structure to use library + multiple binaries
- Main binary renamed to `pop-to-imap` for clarity
- All modules made public for library usage
- Updated README with detailed usage instructions for both tools
### Fixed
- Email headers with missing whitespace on continuation lines now properly formatted
- RFC 5322 Section 2.2.3 compliance for header folding
## [0.1.0] - 2025-12-03
### Added
- Initial release
- POP3 to IMAP email migration
- TLS/SSL support for secure connections
- Environment-based configuration via .env files
- Docker support with multi-stage builds
- Basic error handling and logging

2
Cargo.lock generated
View File

@@ -597,7 +597,7 @@ dependencies = [
[[package]]
name = "rs_pop_imap_importer"
version = "0.1.0"
version = "0.3.0"
dependencies = [
"clap",
"dotenvy",

View File

@@ -1,8 +1,20 @@
[package]
name = "rs_pop_imap_importer"
version = "0.1.0"
version = "0.3.0"
edition = "2024"
[lib]
name = "rs_pop_imap_importer"
path = "src/lib.rs"
[[bin]]
name = "pop-to-imap"
path = "src/main.rs"
[[bin]]
name = "normalize-imap"
path = "src/bin/normalize_imap.rs"
[dependencies]
clap = { version = "4.5.53", features = ["derive"] }
dotenvy = "0.15.7"

View File

@@ -1,4 +1,4 @@
# Makefile for building and running the POP3 to IMAP Importer Docker image
# Makefile for building and running the POP3 to IMAP Importer
# Variables
IMAGE_NAME = pop-imap-importer
@@ -12,6 +12,29 @@ help: ## Show this help message
@echo "Targets:"
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'
# Local build targets
.PHONY: build-local
build-local: ## Build both binaries locally (release mode)
cargo build --release
.PHONY: pop-to-imap
pop-to-imap: ## Run the POP3 to IMAP migration tool
cargo run --release --bin pop-to-imap
.PHONY: normalize-imap
normalize-imap: ## Run the IMAP normalization tool
cargo run --release --bin normalize-imap
.PHONY: normalize-dry-run
normalize-dry-run: ## Run the IMAP normalization tool in dry-run mode
cargo run --release --bin normalize-imap -- --dry-run
.PHONY: test
test: ## Run all tests
cargo test
# Docker targets
.PHONY: build
build: ## Build the Docker image for linux/amd64 platform
docker build --platform=$(DOCKER_PLATFORM) -t $(IMAGE_NAME) .

View File

@@ -4,10 +4,14 @@ A Rust application that downloads emails from a POP3 server and imports them int
## Features
- Downloads all emails from a POP3 server
- Imports emails to the INBOX of an IMAP server
- **POP3 to IMAP Migration**: Downloads all emails from a POP3 server and imports them to IMAP
- **IMAP Email Normalization**: Fix existing malformed emails already in your IMAP inbox
- Secure TLS connections
- Environment-based configuration
- Automatic email header normalization (RFC 5322 compliance)
- Fixes improperly formatted header continuation lines
- Ensures continuation lines start with proper whitespace
- Safe operation with dry-run mode and confirmation prompts
## Setup
@@ -35,8 +39,35 @@ A Rust application that downloads emails from a POP3 server and imports them int
## Usage
Run the application:
### POP3 to IMAP Migration
Run the main importer to migrate emails from POP3 to IMAP:
```bash
cargo run
cargo run --bin pop-to-imap
# or in release mode
cargo run --release --bin pop-to-imap
```
### Normalize Existing IMAP Emails
If you already have malformed emails in your IMAP inbox, use the normalization tool:
```bash
# Dry run to see what would be changed
cargo run --release --bin normalize-imap -- --dry-run
# Actually normalize the emails (will prompt for confirmation)
cargo run --release --bin normalize-imap
# Skip confirmation prompt
cargo run --release --bin normalize-imap --yes
```
**Note:** The normalize-imap tool will:
1. Fetch all emails from your IMAP INBOX
2. Analyze which emails have malformed headers
3. Show you a summary of what needs to be fixed
4. Delete and re-import only the emails that need normalization
5. Keep emails that are already RFC 5322 compliant unchanged

58
src/bin/fetch_email.rs Normal file
View File

@@ -0,0 +1,58 @@
use clap::Parser;
use rs_pop_imap_importer::{config::Settings, imap_client::ImapClient};
use std::fs;
/// Fetch a specific email from IMAP server by UID
#[derive(Parser, Debug)]
#[clap(version, about, long_about = None)]
struct Args {
/// Path to the .env file containing server configurations
#[clap(short, long, default_value = ".env")]
env_file: String,
/// UID of the email to fetch
#[clap(short, long)]
uid: u32,
/// Output file path (optional, otherwise prints to stdout)
#[clap(short, long)]
output: Option<String>,
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let args = Args::parse();
println!("Connecting to IMAP server...");
let settings = Settings::from_env_file(&args.env_file)?;
let mut imap_client = ImapClient::new(&settings.imap)?;
imap_client.login(&settings.imap)?;
imap_client.select_inbox()?;
println!("Fetching all messages to find UID {}...", args.uid);
let messages = imap_client.fetch_all_messages()?;
let mut found = false;
for (msg_id, content) in messages {
if msg_id == args.uid {
found = true;
println!("Found message with UID {}", args.uid);
println!("Size: {} bytes", content.len());
if let Some(output_path) = args.output {
fs::write(&output_path, &content)?;
println!("Saved to: {}", output_path);
} else {
println!("\n--- Email Content ---\n");
print!("{}", content);
}
break;
}
}
if !found {
eprintln!("Error: Email with UID {} not found", args.uid);
}
imap_client.logout()?;
Ok(())
}

32
src/bin/list_uids.rs Normal file
View File

@@ -0,0 +1,32 @@
use clap::Parser;
use rs_pop_imap_importer::{config::Settings, imap_client::ImapClient};
/// List all email UIDs in IMAP inbox
#[derive(Parser, Debug)]
#[clap(version, about, long_about = None)]
struct Args {
/// Path to the .env file containing server configurations
#[clap(short, long, default_value = ".env")]
env_file: String,
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let args = Args::parse();
let settings = Settings::from_env_file(&args.env_file)?;
let mut imap_client = ImapClient::new(&settings.imap)?;
imap_client.login(&settings.imap)?;
imap_client.select_inbox()?;
println!("Fetching all messages...");
let messages = imap_client.fetch_all_messages()?;
println!("\nFound {} messages:", messages.len());
println!("UIDs:");
for (uid, _) in messages {
println!(" {}", uid);
}
imap_client.logout()?;
Ok(())
}

178
src/bin/normalize_imap.rs Normal file
View File

@@ -0,0 +1,178 @@
use clap::Parser;
use rs_pop_imap_importer::{config::Settings, imap_client::ImapClient, normalize_headers};
/// IMAP Email Normalizer
///
/// This utility fetches emails from an IMAP server, normalizes their headers
/// to ensure RFC 5322 compliance, and re-imports them.
#[derive(Parser, Debug)]
#[clap(version, about, long_about = None)]
struct Args {
/// Path to the .env file containing server configurations
#[clap(short, long, default_value = ".env")]
env_file: String,
/// Perform a dry run without making changes
#[clap(short, long)]
dry_run: bool,
/// Skip confirmation prompt
#[clap(short, long)]
yes: bool,
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let args = Args::parse();
println!("Starting IMAP email header normalization tool...");
// Load configuration from specified .env file
let settings = Settings::from_env_file(&args.env_file)?;
// Connect to IMAP server
println!("Connecting to IMAP server at {}:{}...", settings.imap.host, settings.imap.port);
let mut imap_client = ImapClient::new(&settings.imap)?;
imap_client.login(&settings.imap)?;
imap_client.select_inbox()?;
println!("Successfully connected to IMAP server");
// Fetch all messages
println!("Fetching all messages from INBOX...");
let messages = imap_client.fetch_all_messages()?;
println!("Found {} messages in INBOX", messages.len());
if messages.is_empty() {
println!("No messages to process. Exiting.");
imap_client.logout()?;
return Ok(());
}
// Analyze messages and count how many need normalization
let mut needs_normalization = 0;
let mut normalized_messages = Vec::new();
for (msg_id, content) in &messages {
match normalize_headers(content) {
Ok(normalized) => {
if &normalized != content {
needs_normalization += 1;
// Debug output to show what changed
if args.dry_run {
println!("\nMessage {} needs normalization:", msg_id);
// Show first difference
let orig_lines: Vec<&str> = content.lines().collect();
let norm_lines: Vec<&str> = normalized.lines().collect();
for (i, (o, n)) in orig_lines.iter().zip(norm_lines.iter()).enumerate() {
if o != n {
println!(" Line {}: Missing whitespace on header continuation", i + 1);
println!(" Before: {:?}", o);
println!(" After: {:?}", n);
break;
}
}
}
normalized_messages.push((*msg_id, normalized));
} else {
normalized_messages.push((*msg_id, content.clone()));
}
}
Err(e) => {
eprintln!("Warning: Failed to normalize message {}: {}", msg_id, e);
normalized_messages.push((*msg_id, content.clone()));
}
}
}
println!("\nAnalysis complete:");
println!(" Total messages: {}", messages.len());
println!(" Messages needing normalization: {}", needs_normalization);
println!(" Messages already compliant: {}", messages.len() - needs_normalization);
if needs_normalization == 0 {
println!("\nAll messages are already RFC 5322 compliant. No changes needed.");
imap_client.logout()?;
return Ok(());
}
if args.dry_run {
println!("\nDry run mode - no changes will be made.");
println!("\nTo normalize these messages, run without --dry-run flag.");
imap_client.logout()?;
return Ok(());
}
// Confirmation prompt
if !args.yes {
println!("\nWARNING: This operation will:");
println!(" 1. Delete {} messages with malformed headers from INBOX", needs_normalization);
println!(" 2. Re-import them with normalized headers");
println!("\nThis operation cannot be undone!");
println!("\nDo you want to proceed? (yes/no)");
let mut input = String::new();
std::io::stdin().read_line(&mut input)?;
let input = input.trim().to_lowercase();
if input != "yes" && input != "y" {
println!("Operation cancelled.");
imap_client.logout()?;
return Ok(());
}
}
println!("\nStarting normalization process...");
// Process messages that need normalization
let mut processed = 0;
let mut errors = 0;
for (i, (msg_id, content)) in messages.iter().enumerate() {
let normalized = &normalized_messages[i].1;
// Only process if normalization changed something
if normalized != content {
print!("Processing message {} ({}/{})... ", msg_id, processed + 1, needs_normalization);
// Delete the original message
match imap_client.delete_message(*msg_id) {
Ok(_) => {
// Re-import with normalized headers
match imap_client.append_message(normalized) {
Ok(_) => {
println!("✓ normalized");
processed += 1;
}
Err(e) => {
eprintln!("✗ failed to re-import: {}", e);
errors += 1;
}
}
}
Err(e) => {
eprintln!("✗ failed to delete: {}", e);
errors += 1;
}
}
}
}
// Expunge deleted messages
println!("\nExpunging deleted messages...");
imap_client.expunge()?;
// Summary
println!("\n=== Normalization Summary ===");
println!("Successfully processed: {}", processed);
if errors > 0 {
println!("Errors encountered: {}", errors);
}
println!("Operation completed!");
// Clean up
imap_client.logout()?;
Ok(())
}

237
src/email_processor.rs Normal file
View File

@@ -0,0 +1,237 @@
/// Email processing utilities for normalizing and fixing email format issues
use std::error::Error;
/// Normalizes a section of headers by fixing continuation lines
fn normalize_header_section(headers: &str, line_ending: &str) -> String {
let mut result = String::with_capacity(headers.len());
let mut previous_line_was_header = false;
let lines: Vec<&str> = headers.lines().collect();
let line_count = lines.len();
for (idx, line) in lines.iter().enumerate() {
let is_last_line = idx == line_count - 1;
// Check if this is a header line (starts with a field name followed by colon)
// RFC 5322: field names consist of printable ASCII except colon
let is_header_start = line.chars().next().map_or(false, |c| c.is_ascii_alphabetic())
&& line.find(':').map_or(false, |pos| {
// Ensure all characters before the colon are valid field-name characters
line[..pos].chars().all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
});
if is_header_start {
result.push_str(line);
if !is_last_line {
result.push_str(line_ending);
}
previous_line_was_header = true;
} else if previous_line_was_header {
// This is a continuation line
if line.chars().next().map_or(false, |c| c.is_whitespace()) {
// Already has leading whitespace
result.push_str(line);
} else if !line.is_empty() {
// Missing leading whitespace - add a space
result.push(' ');
result.push_str(line);
} else {
result.push_str(line);
}
if !is_last_line {
result.push_str(line_ending);
}
} else {
result.push_str(line);
if !is_last_line {
result.push_str(line_ending);
}
previous_line_was_header = false;
}
}
result
}
/// Normalizes email headers to ensure RFC 5322 compliance
///
/// This function fixes improperly formatted header continuation lines by ensuring
/// that continuation lines start with at least one whitespace character (space or tab).
///
/// According to RFC 5322 Section 2.2.3:
/// - Header fields can be continued on subsequent lines
/// - Continuation lines MUST begin with at least one LWSP (space or tab)
///
/// This function processes both main email headers AND MIME part headers within the body.
/// It preserves the original line endings (CRLF or LF) of the email.
///
/// # Arguments
/// * `email` - The raw email content as a string
///
/// # Returns
/// * The normalized email with properly formatted header continuation lines
pub fn normalize_headers(email: &str) -> Result<String, Box<dyn Error>> {
// Detect line ending style: CRLF (Windows/SMTP) or LF (Unix)
let line_ending = if email.contains("\r\n") { "\r\n" } else { "\n" };
let separator = if line_ending == "\r\n" { "\r\n\r\n" } else { "\n\n" };
// Find the end of main headers
let main_headers_end = match email.find(separator) {
Some(pos) => pos,
None => return Ok(email.to_string()),
};
// Process main headers
let main_headers = &email[..main_headers_end];
let normalized_main_headers = normalize_header_section(main_headers, line_ending);
// Process the body - look for MIME part headers
let body_start = main_headers_end + separator.len();
let body = &email[body_start..];
let mut result = normalized_main_headers;
result.push_str(separator);
// Process body, looking for MIME part headers
// MIME part headers appear after boundary markers and before the next empty line
let mut current_pos = 0;
while current_pos < body.len() {
// Look for next empty line (potential MIME part header separator)
if let Some(next_sep_pos) = body[current_pos..].find(separator) {
let absolute_sep_pos = current_pos + next_sep_pos;
let section_before = &body[current_pos..absolute_sep_pos];
// Check if this section is MIME part headers:
// - Must contain at least one header line
// - MIME part headers typically include Content-Type, Content-Transfer-Encoding, etc.
// - Should NOT be mixed with body content (HTML, text, etc.)
let lines: Vec<&str> = section_before.lines().collect();
let mut header_count = 0;
let mut non_header_count = 0;
let mut has_mime_headers = false;
let mut last_was_header = false;
for line in &lines {
if line.is_empty() {
continue;
}
// Check if this is a MIME boundary marker
if line.starts_with("--") && line.len() > 2 {
continue; // Skip boundary markers in the analysis
}
// Check if this is a header start line
let is_header_start = line.chars().next().map_or(false, |c| c.is_ascii_alphabetic())
&& line.find(':').map_or(false, |pos| {
line[..pos].chars().all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
});
// Check if this is a continuation line (starts with whitespace)
let is_continuation = line.chars().next().map_or(false, |c| c.is_whitespace());
if is_header_start {
header_count += 1;
last_was_header = true;
// Check for typical MIME headers
if line.starts_with("Content-") || line.starts_with("MIME-Version") {
has_mime_headers = true;
}
} else if is_continuation || last_was_header {
// This is either a proper continuation line OR a line following a header
// (which might be a malformed continuation line missing whitespace)
// In either case, don't count it as body content
continue;
} else {
// Not a header, not a continuation - this is body content
non_header_count += 1;
last_was_header = false;
}
}
// Only normalize if this section contains MIME headers and no body content
// (boundary markers are OK and expected)
if header_count > 0 && has_mime_headers && non_header_count == 0 {
let normalized_section = normalize_header_section(section_before, line_ending);
result.push_str(&normalized_section);
result.push_str(separator);
current_pos = absolute_sep_pos + separator.len();
} else {
// Not MIME headers, copy as-is
result.push_str(&body[current_pos..absolute_sep_pos + separator.len()]);
current_pos = absolute_sep_pos + separator.len();
}
} else {
// No more separators, copy rest of body as-is
result.push_str(&body[current_pos..]);
break;
}
}
Ok(result)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_normalize_headers_with_proper_continuation() {
let email = "From: test@example.com\nSubject: Test\n line 2\nTo: user@example.com\n\nBody";
let result = normalize_headers(email).unwrap();
assert!(result.contains("Subject: Test\n line 2\n"));
}
#[test]
fn test_normalize_headers_with_missing_whitespace() {
let email = "From: test@example.com\nSubject: Test\nline 2\nTo: user@example.com\n\nBody";
let result = normalize_headers(email).unwrap();
assert!(result.contains("Subject: Test\n line 2\n"));
}
#[test]
fn test_normalize_headers_preserves_body() {
let email = "From: test@example.com\nSubject: Test\n\nBody line 1\nBody line 2";
let result = normalize_headers(email).unwrap();
assert!(result.contains("Body line 1\nBody line 2"));
}
#[test]
fn test_normalize_headers_complex_continuation() {
let email = concat!(
"ARC-Seal: i=1; a=rsa-sha256; t=1764789271; cv=none;\n",
"d=google.com; s=arc-20240605;\n",
"b=WzYePPFoiBLQx6r6obqcdcSu658wc1rT9O383Yux3i6ngaTS4Z4Jc1vKOZ128wn1rR\n",
"To: test@example.com\n",
"\n",
"Body"
);
let result = normalize_headers(email).unwrap();
assert!(result.contains(" d=google.com; s=arc-20240605;"));
assert!(result.contains(" b=WzYePPFoiBLQx6r6obqcdcSu658wc1rT9O383Yux3i6ngaTS4Z4Jc1vKOZ128wn1rR"));
}
#[test]
fn test_normalize_headers_preserves_crlf() {
let email = "From: test@example.com\r\nSubject: Test\r\n\r\nBody";
let result = normalize_headers(email).unwrap();
assert!(result.contains("\r\n"));
assert!(!result.contains("\n\n")); // Should not have double LF
}
#[test]
fn test_normalize_headers_crlf_continuation() {
let email = "From: test@example.com\r\nSubject: Test\r\nline 2\r\nTo: user@example.com\r\n\r\nBody";
let result = normalize_headers(email).unwrap();
assert!(result.contains("Subject: Test\r\n line 2\r\n"));
}
#[test]
fn test_normalize_headers_no_changes_needed() {
let email = "From: test@example.com\r\nSubject: Test\r\n line 2\r\nTo: user@example.com\r\n\r\nBody";
let result = normalize_headers(email).unwrap();
assert_eq!(email, result, "Email should not be modified if already compliant");
}
}

View File

@@ -40,4 +40,52 @@ impl ImapClient {
}
Ok(())
}
pub fn fetch_all_messages(&mut self) -> Result<Vec<(u32, String)>, Box<dyn std::error::Error>> {
let mut messages = Vec::new();
if let Some(ref mut session) = self.session {
// Fetch all messages using UID FETCH command
let message_stream = session.uid_fetch("1:*", "RFC822")?;
for msg in message_stream.iter() {
let uid = msg.uid.ok_or("Message missing UID")?;
if let Some(body) = msg.body() {
let message_content = String::from_utf8_lossy(body).to_string();
messages.push((uid, message_content));
}
}
}
Ok(messages)
}
pub fn fetch_message_by_uid(&mut self, uid: u32) -> Result<Option<String>, Box<dyn std::error::Error>> {
if let Some(ref mut session) = self.session {
let message_stream = session.uid_fetch(format!("{}", uid), "RFC822")?;
for msg in message_stream.iter() {
if let Some(body) = msg.body() {
return Ok(Some(String::from_utf8_lossy(body).to_string()));
}
}
}
Ok(None)
}
pub fn delete_message(&mut self, uid: u32) -> Result<(), Box<dyn std::error::Error>> {
if let Some(ref mut session) = self.session {
// Mark message as deleted using UID STORE command
session.uid_store(format!("{}", uid), "+FLAGS (\\Deleted)")?;
}
Ok(())
}
pub fn expunge(&mut self) -> Result<(), Box<dyn std::error::Error>> {
if let Some(ref mut session) = self.session {
// Permanently remove messages marked as deleted
session.expunge()?;
}
Ok(())
}
}

7
src/lib.rs Normal file
View File

@@ -0,0 +1,7 @@
pub mod config;
pub mod pop3_client;
pub mod imap_client;
pub mod email_processor;
// Re-export the normalize_headers function for external use
pub use email_processor::normalize_headers;

View File

@@ -1,11 +1,5 @@
mod config;
mod pop3_client;
mod imap_client;
use clap::Parser;
use config::Settings;
use pop3_client::Pop3Client;
use imap_client::ImapClient;
use rs_pop_imap_importer::{config::Settings, pop3_client::Pop3Client, imap_client::ImapClient, normalize_headers};
/// POP3 to IMAP Email Importer
///
@@ -46,14 +40,17 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
// Process each message
for (msg_id, _) in messages {
println!("Processing message ID: {}", msg_id);
// Retrieve message content
let message_content = pop3_client.retrieve_message(msg_id)?;
// Normalize email headers to ensure RFC 5322 compliance
let normalized_content = normalize_headers(&message_content)?;
// Append message to IMAP inbox
imap_client.append_message(&message_content)?;
imap_client.append_message(&normalized_content)?;
println!("Message {} imported successfully", msg_id);
// Optionally delete message from POP3 server after successful import
// pop3_client.delete_message(msg_id)?;
}