diff --git a/crates/mail-mcp/src/config.rs b/crates/mail-mcp/src/config.rs index e04ece9..de9ba0e 100644 --- a/crates/mail-mcp/src/config.rs +++ b/crates/mail-mcp/src/config.rs @@ -117,11 +117,20 @@ impl Account { } } +/// Strip ONE matching pair of leading+trailing quote chars, never a half. +/// +/// `"foo"` -> `foo`; `'foo'` -> `foo`. `"foo` -> `"foo` (unchanged — a +/// password with a leading `"` and no trailing close stays intact). +/// Mixed quotes (`"foo'`) stay untouched. fn strip_quotes(s: &str) -> &str { - let s = s.strip_prefix('"').unwrap_or(s); - let s = s.strip_suffix('"').unwrap_or(s); - let s = s.strip_prefix('\'').unwrap_or(s); - s.strip_suffix('\'').unwrap_or(s) + if s.len() >= 2 { + let first = s.as_bytes()[0]; + let last = s.as_bytes()[s.len() - 1]; + if (first == b'"' || first == b'\'') && first == last { + return &s[1..s.len() - 1]; + } + } + s } fn config_path() -> Result { diff --git a/crates/mail-mcp/src/imap.rs b/crates/mail-mcp/src/imap.rs index a50c8a5..1286299 100644 --- a/crates/mail-mcp/src/imap.rs +++ b/crates/mail-mcp/src/imap.rs @@ -74,7 +74,35 @@ pub struct FolderEntry { const DEFAULT_LIMIT: u32 = 50; const MAX_LIMIT: u32 = 500; -const SNIPPET_LEN: usize = 240; +/// Cap on raw_eml fetch size in `mail_inbox_read`. Anything larger refuses +/// the read with a hint to use `format=text` (which still pulls the body, +/// but doesn't double-buffer through `String::from_utf8_lossy`). +const MAX_RAW_EML_BYTES: u64 = 20 * 1024 * 1024; + +/// Mailbox-name guard: reject any byte that would force us into IMAP +/// quoted-string or literal-form territory we don't control. CR/LF/NUL +/// could split commands; `\`/`"` would need escaping that async-imap's +/// `uid_copy` does not perform on the destination argument. We refuse +/// the small set rather than try to quote-and-escape on the wire. +fn validate_mailbox(name: &str) -> Result<()> { + if name.is_empty() { + return Err(anyhow!("mailbox name cannot be empty")); + } + for b in name.bytes() { + match b { + b'\r' | b'\n' | 0 => { + return Err(anyhow!("mailbox name contains CR/LF/NUL")); + } + b'"' | b'\\' => { + return Err(anyhow!( + "mailbox name contains `\"` or `\\` — refused; rename the folder server-side if needed" + )); + } + _ => {} + } + } + Ok(()) +} // ============================================================================= // list @@ -82,6 +110,7 @@ const SNIPPET_LEN: usize = 240; pub async fn list(account: &Account, opts: ListOpts) -> Result> { let folder = opts.folder.as_deref().unwrap_or("INBOX"); + validate_mailbox(folder)?; let limit = match opts.limit { 0 => DEFAULT_LIMIT, n if n > MAX_LIMIT => MAX_LIMIT, @@ -209,6 +238,7 @@ pub async fn read( format: &str, ) -> Result { let folder = folder.unwrap_or("INBOX"); + validate_mailbox(folder)?; let format = match format { "text" | "html" | "raw_eml" => format, other => { @@ -224,6 +254,30 @@ pub async fn read( .await .with_context(|| format!("SELECT {folder}"))?; + // Pre-flight size check via RFC822.SIZE so we can refuse oversized fetches + // before we transfer the body. Mail-parser allocates O(message size); a + // 1 GB MIME message would OOM us. + { + let mut size_stream = session + .uid_fetch(uid.to_string(), "(UID RFC822.SIZE)") + .await + .with_context(|| format!("UID FETCH SIZE {uid}"))?; + let size_msg = size_stream + .next() + .await + .ok_or_else(|| anyhow!("no message at UID {uid} in {folder}"))? + .context("UID FETCH SIZE stream")?; + let size = size_msg.size.unwrap_or(0) as u64; + drop(size_stream); + if size > MAX_RAW_EML_BYTES { + session.logout().await.ok(); + return Err(anyhow!( + "message UID {uid} is {size} bytes — refusing to fetch (cap is {MAX_RAW_EML_BYTES}). \ + Use a more specific tool when we add partial-fetch in Phase C." + )); + } + } + // BODY[] = full RFC822 message. We parse with mail-parser, then either // return the text part, html part, or raw. let mut stream = session @@ -277,17 +331,26 @@ pub async fn read( }) .collect(); - // Headers as a flat JSON map (last-write-wins on duplicates is fine for v0.1). + // Headers as a flat JSON map. mail-parser's `HeaderValue::as_text()` + // returns None for structured variants (Address / DateTime / ContentType + // / Received) which would leave most "interesting" headers empty. We use + // `Message::header_raw(name)` instead — that returns the un-decoded + // header value as &str, uniform across all header types. let mut headers = serde_json::Map::new(); + let mut seen = std::collections::HashSet::new(); for h in parsed.headers() { - let name = h.name(); - let val = h.value().as_text().map(|s| s.to_string()).unwrap_or_default(); - headers.insert(name.to_string(), serde_json::Value::String(val)); + let name = h.name().to_string(); + if !seen.insert(name.clone()) { + continue; // duplicate header — keep the first occurrence + } + let raw = parsed + .header_raw(name.as_str()) + .map(|s| s.trim().to_string()) + .unwrap_or_default(); + headers.insert(name, serde_json::Value::String(raw)); } let subject = parsed.subject().unwrap_or_default().to_string(); - let snippet_unused: String = body.chars().take(SNIPPET_LEN).collect(); - let _ = snippet_unused; // suppress unused (kept structure-wise for symmetry) Ok(ReadOutput { uid, @@ -357,7 +420,17 @@ pub async fn search( if query.contains('\r') || query.contains('\n') { return Err(anyhow!("search query must not contain CR or LF")); } + // Reject IMAP literal-form `{N}` / `{N+}` patterns. These would let a + // caller switch the wire parser into literal mode and consume the + // following bytes raw — not a CRLF-bounded injection but still a + // command-shape surprise we don't want to enable. + if has_imap_literal(query) { + return Err(anyhow!( + "search query contains IMAP literal-form `{{N}}` syntax — refused" + )); + } let folder = folder.unwrap_or("INBOX"); + validate_mailbox(folder)?; let limit = match limit { 0 => DEFAULT_LIMIT, n if n > MAX_LIMIT => MAX_LIMIT, @@ -400,10 +473,18 @@ pub async fn thread( if id_unbraced.is_empty() { return Err(anyhow!("message_id is empty")); } - if id_unbraced.contains('"') || id_unbraced.contains('\r') || id_unbraced.contains('\n') { - return Err(anyhow!("message_id must not contain quotes, CR, or LF")); + // Reject `"` (IMAP quoted-string terminator), `\` (IMAP quoted-string + // escape), CR/LF (command terminators), and `{` (literal-form opener). + if id_unbraced + .chars() + .any(|c| matches!(c, '"' | '\\' | '\r' | '\n' | '{')) + { + return Err(anyhow!( + r#"message_id must not contain ", \, CR, LF, or {{"# + )); } let folder = folder.unwrap_or("INBOX"); + validate_mailbox(folder)?; let limit = match limit { 0 => DEFAULT_LIMIT, n if n > MAX_LIMIT => MAX_LIMIT, @@ -460,12 +541,8 @@ pub async fn move_msg( to_folder: &str, ) -> Result<()> { let from = from_folder.unwrap_or("INBOX"); - if to_folder.is_empty() { - return Err(anyhow!("to_folder cannot be empty")); - } - if to_folder.contains('\r') || to_folder.contains('\n') { - return Err(anyhow!("to_folder must not contain CR or LF")); - } + validate_mailbox(from)?; + validate_mailbox(to_folder)?; let mut session = open_session(account).await?; session @@ -594,6 +671,11 @@ async fn open_session( "plain-IMAP (no TLS) not supported in v0.1 — set imap_tls=true and imap_port=993" )); } + // Resolve the password BEFORE the TCP+TLS handshake. If the credential + // is missing or unreadable we want the error before the server logs an + // unauthenticated session (which fail2ban could rate-limit on). + let password = account.resolve_password()?; + let addr = format!("{}:{}", account.imap_host, account.imap_port); let tcp = TcpStream::connect(&addr) .await @@ -614,12 +696,28 @@ async fn open_session( let client = async_imap::Client::new(tls); // greeting was consumed by Client::new in async-imap >= 0.10 let session = client - .login(&account.username, account.resolve_password()?) + .login(&account.username, password) .await .map_err(|(e, _client)| anyhow!("imap login failed: {e}"))?; Ok(session) } +/// Detect the IMAP literal-form `{N}` / `{N+}` / `{N+}\r\n` opener pattern. +/// We don't try to validate fully — any `{` substring is enough +/// signal to refuse a user-supplied search query. +fn has_imap_literal(s: &str) -> bool { + let mut chars = s.chars().peekable(); + while let Some(c) = chars.next() { + if c == '{' { + // The next char must be an ASCII digit for it to be a literal opener. + if matches!(chars.peek(), Some(d) if d.is_ascii_digit()) { + return true; + } + } + } + false +} + fn rustls_roots() -> rustls::RootCertStore { let mut roots = rustls::RootCertStore::empty(); roots.extend(webpki_roots::TLS_SERVER_ROOTS.iter().cloned()); diff --git a/crates/mail-mcp/src/smtp.rs b/crates/mail-mcp/src/smtp.rs index f827f43..a8362f3 100644 --- a/crates/mail-mcp/src/smtp.rs +++ b/crates/mail-mcp/src/smtp.rs @@ -52,10 +52,58 @@ pub struct SendOutput { const USER_AGENT: &str = concat!("mail-mcp/", env!("CARGO_PKG_VERSION")); +/// Hard caps to keep an MCP-driver-gone-wrong from OOM-ing the box. +/// Match Gmail's effective 25 MB per-message ceiling — any single attachment +/// past this is almost certainly unintended. Body caps are generous; HTML +/// bodies past 5 MB are a smell. +const MAX_ATTACHMENT_BYTES: usize = 25 * 1024 * 1024; +const MAX_ATTACHMENTS: usize = 25; +const MAX_BODY_BYTES: usize = 5 * 1024 * 1024; +const MAX_TOTAL_RECIPIENTS: usize = 100; + pub async fn send(account: &Account, input: SendInput) -> Result { if input.to.is_empty() { return Err(anyhow!("at least one `to` address required")); } + let total_recipients = input.to.len() + input.cc.len() + input.bcc.len(); + if total_recipients > MAX_TOTAL_RECIPIENTS { + return Err(anyhow!( + "too many recipients: {total_recipients} (cap {MAX_TOTAL_RECIPIENTS})" + )); + } + if input.body.len() > MAX_BODY_BYTES { + return Err(anyhow!( + "body too large: {} bytes (cap {MAX_BODY_BYTES})", + input.body.len() + )); + } + if let Some(html) = &input.body_html { + if html.len() > MAX_BODY_BYTES { + return Err(anyhow!( + "body_html too large: {} bytes (cap {MAX_BODY_BYTES})", + html.len() + )); + } + } + if input.attachments.len() > MAX_ATTACHMENTS { + return Err(anyhow!( + "too many attachments: {} (cap {MAX_ATTACHMENTS})", + input.attachments.len() + )); + } + // Coarse pre-decode size estimate — base64 expands the payload ~33%, + // so the encoded string is ~4*N/3 of the decoded bytes. Refuse early + // if the encoded form alone exceeds 4/3 of MAX, saving a giant decode. + let max_encoded = (MAX_ATTACHMENT_BYTES * 4 + 2) / 3; + for att in &input.attachments { + if att.content_base64.len() > max_encoded { + return Err(anyhow!( + "attachment `{}` encoded length {} exceeds limit (decoded cap {MAX_ATTACHMENT_BYTES})", + att.filename, + att.content_base64.len() + )); + } + } // Build From, To, Cc, Bcc. let from_str = format!("{} <{}>", account.from_name, account.from_addr); @@ -173,6 +221,13 @@ fn build_body(input: &SendInput) -> Result { let bytes = base64::engine::general_purpose::STANDARD .decode(&att.content_base64) .with_context(|| format!("attachment `{}`: invalid base64", att.filename))?; + if bytes.len() > MAX_ATTACHMENT_BYTES { + return Err(anyhow!( + "attachment `{}` decoded to {} bytes — exceeds cap {MAX_ATTACHMENT_BYTES}", + att.filename, + bytes.len() + )); + } let content_type: ContentType = att.mime_type.parse().with_context(|| { format!( "attachment `{}`: invalid mime_type `{}`", diff --git a/crates/mail-mcp/src/tools.rs b/crates/mail-mcp/src/tools.rs index e4e39c1..97727c9 100644 --- a/crates/mail-mcp/src/tools.rs +++ b/crates/mail-mcp/src/tools.rs @@ -280,7 +280,7 @@ impl MailService { #[tool( name = "mail_search", - description = "Raw IMAP SEARCH passthrough against a folder (default INBOX). Examples: `SUBJECT \"invoice\"`, `FROM \"cobb@sulkta.com\"`, `SINCE 21-May-2026 UNSEEN`, `OR SUBJECT \"foo\" SUBJECT \"bar\"`. CR/LF in the query are rejected (anti-injection). Returns the same summary shape as mail_inbox_list, newest UID first." + description = "Raw IMAP SEARCH passthrough against a folder (default INBOX). Examples: `SUBJECT \"invoice\"`, `FROM \"cobb@sulkta.com\"`, `SINCE 21-May-2026 UNSEEN`, `OR SUBJECT \"foo\" SUBJECT \"bar\"`. CR/LF and IMAP `{N}` literal-form are rejected, but the query is otherwise passed raw — do not pass untrusted input (an unbalanced `\"` can change which UIDs match). Returns the same summary shape as mail_inbox_list, newest UID first." )] async fn mail_search( &self, @@ -388,10 +388,14 @@ impl ServerHandler for MailService { ServerInfo { capabilities: ServerCapabilities::builder().enable_tools().build(), instructions: Some( - "mail-mcp — Rust MCP server for Sulkta-hosted email. \ - Tools: mail_send, mail_inbox_list, mail_inbox_read. \ - Default account from config; pass `account` to switch. \ - Reads use BODY.PEEK so they don't toggle the \\Seen flag." + "mail-mcp — Rust MCP server for Sulkta-hosted email. Tools: \ + mail_send, mail_inbox_list, mail_inbox_read, mail_folder_list, \ + mail_search, mail_thread, mail_move. Default account from \ + config; pass `account` to switch. Reads use BODY.PEEK so they \ + don't toggle the \\Seen flag. UID is stable across SELECT; \ + sequence numbers are not — always address by UID. mail_search \ + takes a raw IMAP SEARCH query; mail_thread walks the \ + References + In-Reply-To chain." .into(), ), ..Default::default()