Cap attribution_link recursion + Downloader response body size
Two adversarial bugs surfaced by the round-2 audit on this crate. extract_video_id recursion (linkhandler/stream.rs) /attribution_link?u=<inner> recursed on the inner URL with no depth guard. The comment claimed 'only one level deep' but the call was plain recursion — a pasted URL whose u= param decodes to another /attribution_link would recurse until the JVM stack blew. Wrap the recursion in extract_video_id_inner with an explicit depth counter capped at MAX_ATTRIBUTION_DEPTH = 1. ReqwestDownloader body cap (downloader/default_impl.rs) resp.text() read the entire response body into a String with no upper bound. Player.js is ~1.5 MB, watch HTML ~3 MB, channel responses well under 1 MB. A hostile redirect target (or compromised host) could blast multi-GB and OOM-kill the Android process — there is no headroom on a 1 GB JVM heap ceiling. Cap at 32 MB. Two-stage check: bail fast on a known Content-Length that exceeds the cap, and use Read::take(MAX+1) on the stream so we detect overrun rather than silently truncate. Switched the final decode to from_utf8_lossy so a single mojibake byte doesn't drop the whole response (same fix shape as the wrapper's read_capped_body).
This commit is contained in:
parent
7d2e4c51b8
commit
bfd06d1ef3
2 changed files with 50 additions and 3 deletions
|
|
@ -8,6 +8,7 @@
|
|||
// * HTTP 429 → NetworkError::Recaptcha; all other status codes surface
|
||||
// as Ok(Response)
|
||||
|
||||
use std::io::Read;
|
||||
use std::time::Duration;
|
||||
|
||||
use reqwest::blocking::Client;
|
||||
|
|
@ -22,6 +23,12 @@ const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
|
|||
const MAX_REDIRECTS: usize = 10;
|
||||
const USER_AGENT: &str =
|
||||
"Mozilla/5.0 (Linux; Android 14) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Mobile Safari/537.36";
|
||||
/// Hard cap on response body size. Real YT player.js is ~1.5 MB,
|
||||
/// InnerTube watch-page HTML is ~3 MB, channel/playlist responses
|
||||
/// are well under 1 MB. 32 MB leaves a generous margin while
|
||||
/// blocking a hostile / compromised host from blasting GB-scale
|
||||
/// bodies into the JVM and OOM-killing the process.
|
||||
const MAX_BODY_BYTES: u64 = 32 * 1024 * 1024;
|
||||
|
||||
pub struct ReqwestDownloader {
|
||||
client: Client,
|
||||
|
|
@ -90,7 +97,30 @@ impl Downloader for ReqwestDownloader {
|
|||
headers.entry(key).or_default().push(val);
|
||||
}
|
||||
|
||||
let body = resp.text()?;
|
||||
// Fail fast when a known Content-Length already exceeds the cap.
|
||||
if let Some(len) = resp.content_length() {
|
||||
if len > MAX_BODY_BYTES {
|
||||
return Err(NetworkError::Transport(format!(
|
||||
"response body {len} bytes exceeds cap {MAX_BODY_BYTES}"
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
// Streaming read with hard cap. take(N+1) so we can detect
|
||||
// "exceeded the cap" rather than "exactly at the cap, maybe
|
||||
// more truncated" — if read_to_end fills MAX+1 bytes we know
|
||||
// the upstream had more.
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
let mut limited = resp.take(MAX_BODY_BYTES + 1);
|
||||
limited
|
||||
.read_to_end(&mut buf)
|
||||
.map_err(|e| NetworkError::Transport(format!("body read: {e}")))?;
|
||||
if buf.len() as u64 > MAX_BODY_BYTES {
|
||||
return Err(NetworkError::Transport(format!(
|
||||
"response body exceeded cap {MAX_BODY_BYTES}"
|
||||
)));
|
||||
}
|
||||
let body = String::from_utf8_lossy(&buf).into_owned();
|
||||
|
||||
Ok(Response::new(code, message, headers, body, url_after_redirects))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,6 +30,18 @@ pub fn is_valid_video_id(id: &str) -> bool {
|
|||
/// the URL doesn't look like a YT video URL (so search results / channel
|
||||
/// pages return None rather than Err — caller decides).
|
||||
pub fn extract_video_id(input_url: &str) -> Result<String, LinkError> {
|
||||
extract_video_id_inner(input_url, 0)
|
||||
}
|
||||
|
||||
/// Internal recursion helper. `depth` is bumped on every
|
||||
/// /attribution_link?u=... re-entry so a maliciously-nested
|
||||
/// attribution chain can't blow the stack.
|
||||
fn extract_video_id_inner(input_url: &str, depth: u8) -> Result<String, LinkError> {
|
||||
// Cap attribution-link recursion. Real YT never nests these; a
|
||||
// pasted-URL DoS that recurses ~10k levels would stack-overflow
|
||||
// the JVM via UniFFI. One level is enough for the legitimate
|
||||
// share-from-attribution-app case.
|
||||
const MAX_ATTRIBUTION_DEPTH: u8 = 1;
|
||||
let url = Url::parse(input_url)
|
||||
.map_err(|e| LinkError::InvalidUrl(format!("{input_url}: {e}")))?;
|
||||
let host = url
|
||||
|
|
@ -68,10 +80,15 @@ pub fn extract_video_id(input_url: &str) -> Result<String, LinkError> {
|
|||
|
||||
// /attribution_link?u=<encoded watch url>
|
||||
if candidate.is_none() && path.starts_with("/attribution_link") {
|
||||
if depth >= MAX_ATTRIBUTION_DEPTH {
|
||||
return Err(LinkError::InvalidUrl(
|
||||
"attribution_link recursion exceeded cap".into(),
|
||||
));
|
||||
}
|
||||
if let Some((_, u_param)) = url.query_pairs().find(|(k, _)| k == "u") {
|
||||
// Recurse on the decoded URL — but only one level deep.
|
||||
// Recurse on the decoded URL with depth bump.
|
||||
let inner = format!("https://www.youtube.com{u_param}");
|
||||
return extract_video_id(&inner);
|
||||
return extract_video_id_inner(&inner, depth + 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue