Cap attribution_link recursion + Downloader response body size

Two adversarial bugs surfaced by the round-2 audit on this crate.

extract_video_id recursion (linkhandler/stream.rs)
  /attribution_link?u=<inner> recursed on the inner URL with no depth
  guard. The comment claimed 'only one level deep' but the call was
  plain recursion — a pasted URL whose u= param decodes to another
  /attribution_link would recurse until the JVM stack blew. Wrap the
  recursion in extract_video_id_inner with an explicit depth counter
  capped at MAX_ATTRIBUTION_DEPTH = 1.

ReqwestDownloader body cap (downloader/default_impl.rs)
  resp.text() read the entire response body into a String with no
  upper bound. Player.js is ~1.5 MB, watch HTML ~3 MB, channel
  responses well under 1 MB. A hostile redirect target (or compromised
  host) could blast multi-GB and OOM-kill the Android process — there
  is no headroom on a 1 GB JVM heap ceiling.

  Cap at 32 MB. Two-stage check: bail fast on a known Content-Length
  that exceeds the cap, and use Read::take(MAX+1) on the stream so we
  detect overrun rather than silently truncate. Switched the final
  decode to from_utf8_lossy so a single mojibake byte doesn't drop the
  whole response (same fix shape as the wrapper's read_capped_body).
This commit is contained in:
Kayos 2026-05-26 22:02:40 -07:00
parent 7d2e4c51b8
commit bfd06d1ef3
2 changed files with 50 additions and 3 deletions

View file

@ -8,6 +8,7 @@
// * HTTP 429 → NetworkError::Recaptcha; all other status codes surface
// as Ok(Response)
use std::io::Read;
use std::time::Duration;
use reqwest::blocking::Client;
@ -22,6 +23,12 @@ const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
const MAX_REDIRECTS: usize = 10;
const USER_AGENT: &str =
"Mozilla/5.0 (Linux; Android 14) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Mobile Safari/537.36";
/// Hard cap on response body size. Real YT player.js is ~1.5 MB,
/// InnerTube watch-page HTML is ~3 MB, channel/playlist responses
/// are well under 1 MB. 32 MB leaves a generous margin while
/// blocking a hostile / compromised host from blasting GB-scale
/// bodies into the JVM and OOM-killing the process.
const MAX_BODY_BYTES: u64 = 32 * 1024 * 1024;
pub struct ReqwestDownloader {
client: Client,
@ -90,7 +97,30 @@ impl Downloader for ReqwestDownloader {
headers.entry(key).or_default().push(val);
}
let body = resp.text()?;
// Fail fast when a known Content-Length already exceeds the cap.
if let Some(len) = resp.content_length() {
if len > MAX_BODY_BYTES {
return Err(NetworkError::Transport(format!(
"response body {len} bytes exceeds cap {MAX_BODY_BYTES}"
)));
}
}
// Streaming read with hard cap. take(N+1) so we can detect
// "exceeded the cap" rather than "exactly at the cap, maybe
// more truncated" — if read_to_end fills MAX+1 bytes we know
// the upstream had more.
let mut buf: Vec<u8> = Vec::new();
let mut limited = resp.take(MAX_BODY_BYTES + 1);
limited
.read_to_end(&mut buf)
.map_err(|e| NetworkError::Transport(format!("body read: {e}")))?;
if buf.len() as u64 > MAX_BODY_BYTES {
return Err(NetworkError::Transport(format!(
"response body exceeded cap {MAX_BODY_BYTES}"
)));
}
let body = String::from_utf8_lossy(&buf).into_owned();
Ok(Response::new(code, message, headers, body, url_after_redirects))
}

View file

@ -30,6 +30,18 @@ pub fn is_valid_video_id(id: &str) -> bool {
/// the URL doesn't look like a YT video URL (so search results / channel
/// pages return None rather than Err — caller decides).
pub fn extract_video_id(input_url: &str) -> Result<String, LinkError> {
extract_video_id_inner(input_url, 0)
}
/// Internal recursion helper. `depth` is bumped on every
/// /attribution_link?u=... re-entry so a maliciously-nested
/// attribution chain can't blow the stack.
fn extract_video_id_inner(input_url: &str, depth: u8) -> Result<String, LinkError> {
// Cap attribution-link recursion. Real YT never nests these; a
// pasted-URL DoS that recurses ~10k levels would stack-overflow
// the JVM via UniFFI. One level is enough for the legitimate
// share-from-attribution-app case.
const MAX_ATTRIBUTION_DEPTH: u8 = 1;
let url = Url::parse(input_url)
.map_err(|e| LinkError::InvalidUrl(format!("{input_url}: {e}")))?;
let host = url
@ -68,10 +80,15 @@ pub fn extract_video_id(input_url: &str) -> Result<String, LinkError> {
// /attribution_link?u=<encoded watch url>
if candidate.is_none() && path.starts_with("/attribution_link") {
if depth >= MAX_ATTRIBUTION_DEPTH {
return Err(LinkError::InvalidUrl(
"attribution_link recursion exceeded cap".into(),
));
}
if let Some((_, u_param)) = url.query_pairs().find(|(k, _)| k == "u") {
// Recurse on the decoded URL — but only one level deep.
// Recurse on the decoded URL with depth bump.
let inner = format!("https://www.youtube.com{u_param}");
return extract_video_id(&inner);
return extract_video_id_inner(&inner, depth + 1);
}
}