Round-2 cruft audit punch list — mechanical deletes, no behavior change.
Whole modules deleted (no wrapper consumer):
* youtube/playlist_extractor.rs (297 LOC) — full playlist extraction
* youtube/linkhandler/playlist.rs (81 LOC) — playlist URL parser
* youtube/suggestion_extractor.rs (91 LOC) — search-as-you-type
* tests/stream_phase4_offline.rs (186 LOC) — tautological test
Dead pub fns + enum variants + constants:
* WEB_REMIX_* constants (3) + WEB_MUSIC_ANALYTICS_* constants (3)
* InnertubeClientRequestInfo::of_web_music_analytics_charts_client
factory + its charts_client_omits_platform_and_screen test
* SearchFilter::Music{Songs,Videos,Albums,Playlists,Artists} variants
(5 of 9 cases) + uses_music_endpoint helper + the search_extractor
'music search not implemented' reject branch
* Two #[allow(dead_code)] _suppress_unused stub fns and the imports
they were keeping alive (std::sync::Arc in js/extractor.rs,
NetworkError in stream_extractor.rs)
Renamed:
* search_extractor::test_helpers -> renderer_helpers. Mis-named:
it's production code called from channel.rs, not a test fixture.
potoken/ kept and documented as the designed Phase-5 extension point
for YouTube bot-detection — wrapper's Android side hasn't registered
a real provider yet, but the trait + global slot stay so when YT
forces po_token universally the integration is one Kotlin patch away,
not a Rust-side rewrite.
~580 LOC removed from production. Wrapper does not need to change.
189 lines
6.8 KiB
Rust
189 lines
6.8 KiB
Rust
// player.js URL discovery + download. Mirrors NPE
|
|
// services/youtube/YoutubeJavaScriptExtractor.java.
|
|
//
|
|
// Two discovery paths, in order:
|
|
// 1. iframe_api regex (primary)
|
|
// 2. embed/<videoId> page — Jsoup script-tag walk + jsUrl regex fallback
|
|
//
|
|
// PARITY: we deliberately reproduce NPE's bug where `select("script")
|
|
// .attr("name", "player/base")` *mutates* the script tags and iterates ALL
|
|
// of them. The intent was "find the script with name=player/base" but
|
|
// Jsoup's attr-setter doesn't filter. Our walk does the same — iterate
|
|
// every script tag, return first whose `src` contains `base.js`.
|
|
|
|
use once_cell::sync::Lazy;
|
|
use regex::Regex;
|
|
|
|
use crate::downloader::request::Request;
|
|
use crate::downloader::Downloader;
|
|
use crate::localization::Localization;
|
|
use crate::newpipe::NewPipe;
|
|
use crate::youtube::js::DeobfError;
|
|
|
|
const IFRAME_API_URL: &str = "https://www.youtube.com/iframe_api";
|
|
const BASE_JS_PLAYER_URL_FORMAT: &str =
|
|
"https://www.youtube.com/s/player/{HASH}/player_ias.vflset/en_GB/base.js";
|
|
|
|
static IFRAME_RES_JS_BASE_PLAYER_HASH: Lazy<Regex> =
|
|
Lazy::new(|| Regex::new(r"player\\/([a-z0-9]{8})\\/").unwrap());
|
|
|
|
static EMBEDDED_WATCH_PAGE_JS_BASE_PLAYER_URL: Lazy<Regex> = Lazy::new(|| {
|
|
Regex::new(
|
|
r#""jsUrl":"(/s/player/[A-Za-z0-9]+/player_ias\.vflset/[A-Za-z_-]+/base\.js)""#,
|
|
)
|
|
.unwrap()
|
|
});
|
|
|
|
static SCRIPT_TAG: Lazy<Regex> =
|
|
Lazy::new(|| Regex::new(r#"<script[^>]*\bsrc=["']([^"']+)["'][^>]*>"#).unwrap());
|
|
|
|
/// Extracts the player.js URL + body. Tries iframe_api first, falls back
|
|
/// to the embed page on any failure (matches NPE's try/catch flow).
|
|
pub fn extract_javascript_player_code(video_id: &str) -> Result<(String, String), DeobfError> {
|
|
let downloader = NewPipe::downloader().ok_or(DeobfError::DownloaderMissing)?;
|
|
|
|
let url = match extract_from_iframe(&*downloader) {
|
|
Ok(u) => u,
|
|
Err(_iframe_err) => extract_from_embed(&*downloader, video_id)?,
|
|
};
|
|
let cleaned = clean_javascript_url(&url)?;
|
|
let body = download_javascript_code(&*downloader, &cleaned)?;
|
|
Ok((cleaned, body))
|
|
}
|
|
|
|
fn extract_from_iframe(downloader: &dyn Downloader) -> Result<String, DeobfError> {
|
|
let req = Request::get(IFRAME_API_URL)
|
|
.localization(Some(Localization::default()))
|
|
.build();
|
|
let resp = downloader
|
|
.execute(req)
|
|
.map_err(|e| DeobfError::FetchIframe(e.to_string()))?;
|
|
let body = resp.response_body();
|
|
let hash = IFRAME_RES_JS_BASE_PLAYER_HASH
|
|
.captures(body)
|
|
.and_then(|c| c.get(1))
|
|
.ok_or(DeobfError::PlayerUrlMissing)?
|
|
.as_str();
|
|
Ok(BASE_JS_PLAYER_URL_FORMAT.replace("{HASH}", hash))
|
|
}
|
|
|
|
fn extract_from_embed(downloader: &dyn Downloader, video_id: &str) -> Result<String, DeobfError> {
|
|
let embed_url = format!("https://www.youtube.com/embed/{video_id}");
|
|
let req = Request::get(&embed_url)
|
|
.localization(Some(Localization::default()))
|
|
.build();
|
|
let resp = downloader
|
|
.execute(req)
|
|
.map_err(|e| DeobfError::FetchEmbed(e.to_string()))?;
|
|
let body = resp.response_body();
|
|
|
|
// PARITY: NPE iterates every <script> tag (the `.attr("name","player/base")`
|
|
// call sets an attribute rather than filtering). We do the same.
|
|
for caps in SCRIPT_TAG.captures_iter(body) {
|
|
if let Some(src) = caps.get(1) {
|
|
let src = src.as_str();
|
|
if src.contains("base.js") {
|
|
return Ok(src.to_string());
|
|
}
|
|
}
|
|
}
|
|
|
|
// Regex fallback.
|
|
if let Some(c) = EMBEDDED_WATCH_PAGE_JS_BASE_PLAYER_URL.captures(body) {
|
|
if let Some(m) = c.get(1) {
|
|
return Ok(m.as_str().to_string());
|
|
}
|
|
}
|
|
|
|
Err(DeobfError::PlayerUrlMissing)
|
|
}
|
|
|
|
fn clean_javascript_url(url: &str) -> Result<String, DeobfError> {
|
|
let normalized = if let Some(rest) = url.strip_prefix("//") {
|
|
format!("https://{rest}")
|
|
} else if url.starts_with('/') {
|
|
format!("https://www.youtube.com{url}")
|
|
} else {
|
|
url.to_string()
|
|
};
|
|
url::Url::parse(&normalized).map_err(|e| DeobfError::InvalidPlayerUrl(e.to_string()))?;
|
|
Ok(normalized)
|
|
}
|
|
|
|
fn download_javascript_code(downloader: &dyn Downloader, url: &str) -> Result<String, DeobfError> {
|
|
let req = Request::get(url)
|
|
.localization(Some(Localization::default()))
|
|
.build();
|
|
let resp = downloader
|
|
.execute(req)
|
|
.map_err(|e| DeobfError::FetchPlayerCode(e.to_string()))?;
|
|
if resp.response_code() != 200 {
|
|
return Err(DeobfError::FetchPlayerCode(format!(
|
|
"HTTP {}",
|
|
resp.response_code()
|
|
)));
|
|
}
|
|
Ok(resp.response_body().to_string())
|
|
}
|
|
|
|
/// Extracts the 8-char player hash from a URL like
|
|
/// `https://www.youtube.com/s/player/<hash>/player_ias.vflset/.../base.js`.
|
|
/// Used for rotation detection.
|
|
pub fn extract_player_hash(url: &str) -> Option<String> {
|
|
static RE: Lazy<Regex> =
|
|
Lazy::new(|| Regex::new(r"/s/player/([A-Za-z0-9]{8})/").unwrap());
|
|
RE.captures(url).and_then(|c| c.get(1)).map(|m| m.as_str().to_string())
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn iframe_hash_regex_matches_escaped_form() {
|
|
let sample = r#"src:"https://www.youtube.com/s/player\/c2f7551f\/player_ias.vflset/en_US/www-embed.js""#;
|
|
let caps = IFRAME_RES_JS_BASE_PLAYER_HASH.captures(sample).unwrap();
|
|
assert_eq!(caps.get(1).unwrap().as_str(), "c2f7551f");
|
|
}
|
|
|
|
#[test]
|
|
fn embedded_js_url_regex_matches() {
|
|
let sample = r#"...,"jsUrl":"/s/player/abcdef12/player_ias.vflset/en_GB/base.js",..."#;
|
|
let caps = EMBEDDED_WATCH_PAGE_JS_BASE_PLAYER_URL.captures(sample).unwrap();
|
|
assert_eq!(
|
|
caps.get(1).unwrap().as_str(),
|
|
"/s/player/abcdef12/player_ias.vflset/en_GB/base.js"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn script_tag_regex_finds_src() {
|
|
let html = r#"<html><body><script src="//foo.com/base.js" name="x"></script></body></html>"#;
|
|
let caps = SCRIPT_TAG.captures(html).unwrap();
|
|
assert_eq!(caps.get(1).unwrap().as_str(), "//foo.com/base.js");
|
|
}
|
|
|
|
#[test]
|
|
fn clean_url_promotes_protocol_relative() {
|
|
let out = clean_javascript_url("//www.youtube.com/foo/base.js").unwrap();
|
|
assert_eq!(out, "https://www.youtube.com/foo/base.js");
|
|
}
|
|
|
|
#[test]
|
|
fn clean_url_prefixes_youtube_for_absolute_path() {
|
|
let out = clean_javascript_url("/s/player/abc/base.js").unwrap();
|
|
assert_eq!(out, "https://www.youtube.com/s/player/abc/base.js");
|
|
}
|
|
|
|
#[test]
|
|
fn clean_url_passes_through_full() {
|
|
let out = clean_javascript_url("https://www.youtube.com/s/player/x/base.js").unwrap();
|
|
assert_eq!(out, "https://www.youtube.com/s/player/x/base.js");
|
|
}
|
|
|
|
#[test]
|
|
fn player_hash_extracted_from_url() {
|
|
let url = "https://www.youtube.com/s/player/c2f7551f/player_ias.vflset/en_GB/base.js";
|
|
assert_eq!(extract_player_hash(url).as_deref(), Some("c2f7551f"));
|
|
}
|
|
}
|