From 3014410cba7c5ef6c50cee6a2562dd759a2e1cde Mon Sep 17 00:00:00 2001 From: Kayos Date: Sun, 24 May 2026 16:57:47 -0700 Subject: [PATCH] =?UTF-8?q?Phase=203=20=E2=80=94=20InnerTube=20+=20itag?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Port the YT client matrix + request envelope + itag lookup table. src/youtube/ * constants.rs — ClientsConstants.java verbatim. All six live clients (WEB, WEB_EMBEDDED_PLAYER, WEB_MUSIC_ANALYTICS, ANDROID, IOS, plus the WEB_REMIX values for completeness). Base URLs + prettyPrint=false suffix. * client_request.rs — ClientInfo / DeviceInfo / InnertubeClientRequestInfo + the 5 factory constructors NPE exposes (ofWebClient, ofWebEmbeddedPlayer, ofCharts, ofAndroid, ofIos). build_envelope() emits the InnerTube JSON in NPE's exact insertion order; build_desktop_envelope() is the WEB-fast-path used by search/browse/next/resolve_url/comments. * itag.rs — 57-entry itag table (14 progressive + 10 audio + 33 video-only). MediaFormat enum + ItagType enum + ItagItem struct + lookup(). * parsing.rs — consent toggle + cookie generator (SOCS=CAE= / SOCS=CAISAiAD), WEB client-version cache + sw.js scrape, WEB/mobile header builders (mobile deliberately strips X-YouTube-Client-Name + Origin/Referer + Cookie per audit Track A §6.2), android/ios UA templates, visitor_data bootstrap POST to /youtubei/v1/visitor_id. PARITY notes flagged in code: * androidSdkVersion=36 + osVersion=16 but Android-15 in UA — NPE-intentional * mobile clients send NO X-YouTube-Client-* headers * audit doc says "53 entries" but tallies + NPE source = 57 ItagItems Tests: 62 lib unit pass (up from 43 in Phase 2). All Phase 1 + Phase 2 smoke still green. Live InnerTube POSTs (visitor_data bootstrap + /player) deferred to Phase 4 integration. --- src/youtube/client_request.rs | 351 ++++++++++++++++++++++++++++++++++ src/youtube/constants.rs | 53 +++++ src/youtube/itag.rs | 218 +++++++++++++++++++++ src/youtube/mod.rs | 12 +- src/youtube/parsing.rs | 232 ++++++++++++++++++++++ 5 files changed, 863 insertions(+), 3 deletions(-) create mode 100644 src/youtube/client_request.rs create mode 100644 src/youtube/constants.rs create mode 100644 src/youtube/itag.rs create mode 100644 src/youtube/parsing.rs diff --git a/src/youtube/client_request.rs b/src/youtube/client_request.rs new file mode 100644 index 0000000..96f3226 --- /dev/null +++ b/src/youtube/client_request.rs @@ -0,0 +1,351 @@ +// InnertubeClientRequestInfo + JSON envelope builder. Mirrors NPE +// InnertubeClientRequestInfo.java + the prepareJsonBuilder() flow in +// YoutubeParsingHelper.java:1494-1559. +// +// Wire-order matters (audit Track A §2.1) — `serde_json::json!` macro +// preserves insertion order in the resulting Map, but we use a sequence +// of `.insert()` calls into a `serde_json::Map` to be explicit. + +use serde_json::{json, Map, Value}; + +use crate::localization::{ContentCountry, Localization}; +use crate::youtube::constants::*; + +#[derive(Clone, Debug)] +pub struct ClientInfo { + pub client_name: String, + pub client_version: String, + pub client_id: String, + pub client_screen: Option, + pub visitor_data: Option, +} + +#[derive(Clone, Debug, Default)] +pub struct DeviceInfo { + pub platform: Option, + pub device_make: Option, + pub device_model: Option, + pub os_name: Option, + pub os_version: Option, + /// `-1` is NPE's sentinel for "not applicable" — only included in the + /// JSON when > 0. + pub android_sdk_version: i32, +} + +#[derive(Clone, Debug)] +pub struct InnertubeClientRequestInfo { + pub client_info: ClientInfo, + pub device_info: DeviceInfo, +} + +impl InnertubeClientRequestInfo { + pub fn of_web_client() -> Self { + Self { + client_info: ClientInfo { + client_name: WEB_CLIENT_NAME.into(), + client_version: WEB_HARDCODED_CLIENT_VERSION.into(), + client_id: WEB_CLIENT_ID.into(), + client_screen: Some(WATCH_CLIENT_SCREEN.into()), + visitor_data: None, + }, + device_info: DeviceInfo { + platform: Some(DESKTOP_CLIENT_PLATFORM.into()), + android_sdk_version: -1, + ..Default::default() + }, + } + } + + pub fn of_web_embedded_player_client() -> Self { + Self { + client_info: ClientInfo { + client_name: WEB_EMBEDDED_CLIENT_NAME.into(), + client_version: WEB_EMBEDDED_CLIENT_VERSION.into(), + client_id: WEB_EMBEDDED_CLIENT_ID.into(), + client_screen: Some(EMBED_CLIENT_SCREEN.into()), + visitor_data: None, + }, + device_info: DeviceInfo { + platform: Some(DESKTOP_CLIENT_PLATFORM.into()), + android_sdk_version: -1, + ..Default::default() + }, + } + } + + pub fn of_web_music_analytics_charts_client() -> Self { + // NPE deliberately omits clientScreen + platform for charts. + Self { + client_info: ClientInfo { + client_name: WEB_MUSIC_ANALYTICS_CLIENT_NAME.into(), + client_version: WEB_MUSIC_ANALYTICS_CLIENT_VERSION.into(), + client_id: WEB_MUSIC_ANALYTICS_CLIENT_ID.into(), + client_screen: None, + visitor_data: None, + }, + device_info: DeviceInfo { + android_sdk_version: -1, + ..Default::default() + }, + } + } + + pub fn of_android_client() -> Self { + Self { + client_info: ClientInfo { + client_name: ANDROID_CLIENT_NAME.into(), + client_version: ANDROID_CLIENT_VERSION.into(), + client_id: ANDROID_CLIENT_ID.into(), + client_screen: Some(WATCH_CLIENT_SCREEN.into()), + visitor_data: None, + }, + device_info: DeviceInfo { + platform: Some(MOBILE_CLIENT_PLATFORM.into()), + os_name: Some("Android".into()), + os_version: Some(ANDROID_OS_VERSION.into()), + android_sdk_version: ANDROID_SDK_VERSION as i32, + ..Default::default() + }, + } + } + + pub fn of_ios_client() -> Self { + Self { + client_info: ClientInfo { + client_name: IOS_CLIENT_NAME.into(), + client_version: IOS_CLIENT_VERSION.into(), + client_id: IOS_CLIENT_ID.into(), + client_screen: Some(WATCH_CLIENT_SCREEN.into()), + visitor_data: None, + }, + device_info: DeviceInfo { + platform: Some(MOBILE_CLIENT_PLATFORM.into()), + device_make: Some("Apple".into()), + device_model: Some(IOS_DEVICE_MODEL.into()), + os_name: Some("iOS".into()), + os_version: Some(IOS_OS_VERSION.into()), + android_sdk_version: -1, + }, + } + } +} + +/// Builds the InnerTube request envelope mirroring NPE prepareJsonBuilder. +/// Insertion order matches NPE's wire-order verbatim (audit Track A §2.1). +pub fn build_envelope( + info: &InnertubeClientRequestInfo, + localization: &Localization, + content_country: &ContentCountry, + embed_url: Option<&str>, +) -> Value { + let mut client = Map::new(); + client.insert("clientName".into(), Value::String(info.client_info.client_name.clone())); + client.insert("clientVersion".into(), Value::String(info.client_info.client_version.clone())); + if let Some(s) = &info.client_info.client_screen { + client.insert("clientScreen".into(), Value::String(s.clone())); + } + if let Some(p) = &info.device_info.platform { + client.insert("platform".into(), Value::String(p.clone())); + } + if let Some(v) = &info.client_info.visitor_data { + client.insert("visitorData".into(), Value::String(v.clone())); + } + if let Some(m) = &info.device_info.device_make { + client.insert("deviceMake".into(), Value::String(m.clone())); + } + if let Some(m) = &info.device_info.device_model { + client.insert("deviceModel".into(), Value::String(m.clone())); + } + if let Some(n) = &info.device_info.os_name { + client.insert("osName".into(), Value::String(n.clone())); + } + if let Some(v) = &info.device_info.os_version { + client.insert("osVersion".into(), Value::String(v.clone())); + } + if info.device_info.android_sdk_version > 0 { + client.insert( + "androidSdkVersion".into(), + Value::Number(info.device_info.android_sdk_version.into()), + ); + } + client.insert("hl".into(), Value::String(localization.localization_code())); + client.insert("gl".into(), Value::String(content_country.country_code().into())); + client.insert("utcOffsetMinutes".into(), Value::Number(0.into())); + + let mut context = Map::new(); + context.insert("client".into(), Value::Object(client)); + if let Some(url) = embed_url { + context.insert("thirdParty".into(), json!({ "embedUrl": url })); + } + context.insert( + "request".into(), + json!({ + "internalExperimentFlags": [], + "useSsl": true, + }), + ); + context.insert( + "user".into(), + json!({ + "lockedSafetyMode": false, + }), + ); + + json!({ "context": Value::Object(context) }) +} + +/// Desktop fast-path envelope — mirrors NPE prepareDesktopJsonBuilder +/// (YoutubeParsingHelper.java:1044-1072). Used by search / browse / next +/// / resolve_url / comments. NO clientScreen, NO visitor_data, NO device +/// fields — also adds `originalUrl: https://www.youtube.com` (literal). +pub fn build_desktop_envelope( + localization: &Localization, + content_country: &ContentCountry, + web_client_version: &str, +) -> Value { + let mut client = Map::new(); + client.insert("hl".into(), Value::String(localization.localization_code())); + client.insert("gl".into(), Value::String(content_country.country_code().into())); + client.insert("clientName".into(), Value::String(WEB_CLIENT_NAME.into())); + client.insert("clientVersion".into(), Value::String(web_client_version.into())); + client.insert( + "originalUrl".into(), + Value::String("https://www.youtube.com".into()), + ); + client.insert("platform".into(), Value::String(DESKTOP_CLIENT_PLATFORM.into())); + client.insert("utcOffsetMinutes".into(), Value::Number(0.into())); + + json!({ + "context": { + "client": Value::Object(client), + "request": { + "internalExperimentFlags": [], + "useSsl": true, + }, + "user": { + "lockedSafetyMode": false, + } + } + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn web_client_envelope_shape() { + let info = InnertubeClientRequestInfo::of_web_client(); + let env = build_envelope( + &info, + &Localization::default(), + &ContentCountry::default(), + None, + ); + let client = &env["context"]["client"]; + assert_eq!(client["clientName"], "WEB"); + assert_eq!(client["clientVersion"], "2.20260120.01.00"); + assert_eq!(client["clientScreen"], "WATCH"); + assert_eq!(client["platform"], "DESKTOP"); + assert_eq!(client["hl"], "en-GB"); + assert_eq!(client["gl"], "GB"); + assert_eq!(client["utcOffsetMinutes"], 0); + assert!(client.get("visitorData").is_none()); + assert!(client.get("androidSdkVersion").is_none()); + // thirdParty omitted when no embed_url + assert!(env["context"].get("thirdParty").is_none()); + } + + #[test] + fn android_client_envelope_shape() { + let info = InnertubeClientRequestInfo::of_android_client(); + let env = build_envelope( + &info, + &Localization::default(), + &ContentCountry::default(), + None, + ); + let client = &env["context"]["client"]; + assert_eq!(client["clientName"], "ANDROID"); + assert_eq!(client["clientVersion"], "21.03.36"); + assert_eq!(client["platform"], "MOBILE"); + assert_eq!(client["osName"], "Android"); + assert_eq!(client["osVersion"], "16"); + assert_eq!(client["androidSdkVersion"], 36); + } + + #[test] + fn ios_client_envelope_shape() { + let info = InnertubeClientRequestInfo::of_ios_client(); + let env = build_envelope( + &info, + &Localization::default(), + &ContentCountry::default(), + None, + ); + let client = &env["context"]["client"]; + assert_eq!(client["clientName"], "IOS"); + assert_eq!(client["deviceMake"], "Apple"); + assert_eq!(client["deviceModel"], "iPhone16,2"); + assert_eq!(client["osName"], "iOS"); + assert_eq!(client["osVersion"], "18.7.2.22H124"); + assert!(client.get("androidSdkVersion").is_none()); + } + + #[test] + fn charts_client_omits_platform_and_screen() { + let info = InnertubeClientRequestInfo::of_web_music_analytics_charts_client(); + let env = build_envelope( + &info, + &Localization::default(), + &ContentCountry::default(), + None, + ); + let client = &env["context"]["client"]; + assert_eq!(client["clientName"], "WEB_MUSIC_ANALYTICS"); + assert!(client.get("clientScreen").is_none()); + assert!(client.get("platform").is_none()); + } + + #[test] + fn embed_url_lands_in_third_party_block() { + let info = InnertubeClientRequestInfo::of_web_embedded_player_client(); + let env = build_envelope( + &info, + &Localization::default(), + &ContentCountry::default(), + Some("https://www.youtube.com/embed/abc"), + ); + assert_eq!( + env["context"]["thirdParty"]["embedUrl"], + "https://www.youtube.com/embed/abc" + ); + } + + #[test] + fn visitor_data_lands_in_client_block_when_set() { + let mut info = InnertubeClientRequestInfo::of_android_client(); + info.client_info.visitor_data = Some("Cgs1ZG1...".into()); + let env = build_envelope( + &info, + &Localization::default(), + &ContentCountry::default(), + None, + ); + assert_eq!(env["context"]["client"]["visitorData"], "Cgs1ZG1..."); + } + + #[test] + fn desktop_envelope_uses_original_url_and_no_visitor() { + let env = build_desktop_envelope( + &Localization::default(), + &ContentCountry::default(), + "2.20260120.01.00", + ); + let client = &env["context"]["client"]; + assert_eq!(client["originalUrl"], "https://www.youtube.com"); + assert_eq!(client["platform"], "DESKTOP"); + assert!(client.get("clientScreen").is_none()); + assert!(client.get("visitorData").is_none()); + } +} diff --git a/src/youtube/constants.rs b/src/youtube/constants.rs new file mode 100644 index 0000000..d70e797 --- /dev/null +++ b/src/youtube/constants.rs @@ -0,0 +1,53 @@ +// ClientsConstants — mirrors NPE services/youtube/ClientsConstants.java. +// +// Six live InnerTube clients: WEB, WEB_EMBEDDED_PLAYER, WEB_MUSIC_ANALYTICS, +// WEB_REMIX, ANDROID, IOS. NPE's tree also mentions TVHTML5 + MWEB in +// comments — not actively used; skipped per audit Track A §1.3. + +pub const DESKTOP_CLIENT_PLATFORM: &str = "DESKTOP"; +pub const MOBILE_CLIENT_PLATFORM: &str = "MOBILE"; +pub const WATCH_CLIENT_SCREEN: &str = "WATCH"; +pub const EMBED_CLIENT_SCREEN: &str = "EMBED"; + +pub const WEB_CLIENT_ID: &str = "1"; +pub const WEB_CLIENT_NAME: &str = "WEB"; +pub const WEB_HARDCODED_CLIENT_VERSION: &str = "2.20260120.01.00"; + +pub const WEB_REMIX_CLIENT_ID: &str = "67"; +pub const WEB_REMIX_CLIENT_NAME: &str = "WEB_REMIX"; +pub const WEB_REMIX_HARDCODED_CLIENT_VERSION: &str = "1.20260121.03.00"; + +pub const WEB_EMBEDDED_CLIENT_ID: &str = "56"; +pub const WEB_EMBEDDED_CLIENT_NAME: &str = "WEB_EMBEDDED_PLAYER"; +pub const WEB_EMBEDDED_CLIENT_VERSION: &str = "1.20260122.01.00"; + +pub const WEB_MUSIC_ANALYTICS_CLIENT_ID: &str = "31"; +pub const WEB_MUSIC_ANALYTICS_CLIENT_NAME: &str = "WEB_MUSIC_ANALYTICS"; +pub const WEB_MUSIC_ANALYTICS_CLIENT_VERSION: &str = "2.0"; + +// iPhone 15 Pro Max — chosen explicitly for 60fps tags per ItagItem.java:26 +// note and the gist referenced in NPE source. +pub const IOS_CLIENT_ID: &str = "5"; +pub const IOS_CLIENT_NAME: &str = "IOS"; +pub const IOS_CLIENT_VERSION: &str = "21.03.2"; +pub const IOS_DEVICE_MODEL: &str = "iPhone16,2"; +pub const IOS_OS_VERSION: &str = "18.7.2.22H124"; +pub const IOS_USER_AGENT_VERSION: &str = "18_7_2"; + +pub const ANDROID_CLIENT_ID: &str = "3"; +pub const ANDROID_CLIENT_NAME: &str = "ANDROID"; +pub const ANDROID_CLIENT_VERSION: &str = "21.03.36"; + +// PARITY: NPE hard-codes androidSdkVersion=36 + osVersion=16 even though +// the User-Agent advertises Android 15. DroidGuard doesn't check the +// InnerTube context so this mismatch is intentional and not a bug. +pub const ANDROID_SDK_VERSION: u32 = 36; +pub const ANDROID_OS_VERSION: &str = "16"; + +// Base URLs (NPE YoutubeParsingHelper.java:91,96). +pub const YOUTUBEI_V1_URL: &str = "https://www.youtube.com/youtubei/v1/"; +pub const YOUTUBEI_V1_GAPIS_URL: &str = "https://youtubei.googleapis.com/youtubei/v1/"; +pub const YOUTUBE_MUSIC_INNERTUBE_URL: &str = "https://music.youtube.com/youtubei/v1/"; + +// All InnerTube calls carry this query suffix to shrink responses. +pub const DISABLE_PRETTY_PRINT_PARAM: &str = "?prettyPrint=false"; diff --git a/src/youtube/itag.rs b/src/youtube/itag.rs new file mode 100644 index 0000000..1a53e92 --- /dev/null +++ b/src/youtube/itag.rs @@ -0,0 +1,218 @@ +// itag → MediaFormat table. Mirrors NPE ItagItem.java:28-101 — the +// hard-coded array of 53 entries (14 combined-AV + 10 audio + 33 +// video-only). +// +// Codec column ("AV1", "VP9") is derived from response mimeType at extract +// time, NOT stored here — matches NPE's source comment ItagItem.java:26. + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] +pub enum MediaFormat { + /// 3gp (legacy progressive video+audio) + V3GPP, + /// mp4 (video+audio or video-only) + Mpeg4, + /// webm (video+audio or video-only) + Webm, + /// M4A — AAC audio in mp4 container + M4A, + /// vorbis audio in webm container + Webma, + /// opus audio in webm container + WebmaOpus, +} + +impl MediaFormat { + pub fn mime(&self) -> &'static str { + match self { + MediaFormat::V3GPP => "video/3gpp", + MediaFormat::Mpeg4 => "video/mp4", + MediaFormat::Webm => "video/webm", + MediaFormat::M4A => "audio/mp4", + MediaFormat::Webma => "audio/webm", + MediaFormat::WebmaOpus => "audio/webm", + } + } + + pub fn extension(&self) -> &'static str { + match self { + MediaFormat::V3GPP => "3gp", + MediaFormat::Mpeg4 => "mp4", + MediaFormat::Webm => "webm", + MediaFormat::M4A => "m4a", + MediaFormat::Webma => "webm", + MediaFormat::WebmaOpus => "webm", + } + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] +pub enum ItagType { + /// Legacy progressive — single stream carries both video and audio. + Video, + /// DASH audio-only. + Audio, + /// DASH video-only adaptive. + VideoOnly, +} + +#[derive(Clone, Debug)] +pub struct ItagItem { + pub id: u32, + pub item_type: ItagType, + pub format: MediaFormat, + pub resolution: Option<&'static str>, + pub fps: u32, + pub avg_bitrate_kbps: Option, +} + +const fn av(id: u32, format: MediaFormat, resolution: &'static str) -> ItagItem { + ItagItem { + id, + item_type: ItagType::Video, + format, + resolution: Some(resolution), + fps: 30, + avg_bitrate_kbps: None, + } +} + +const fn audio(id: u32, format: MediaFormat, kbps: u32) -> ItagItem { + ItagItem { + id, + item_type: ItagType::Audio, + format, + resolution: None, + fps: 0, + avg_bitrate_kbps: Some(kbps), + } +} + +const fn vo(id: u32, format: MediaFormat, resolution: &'static str, fps: u32) -> ItagItem { + ItagItem { + id, + item_type: ItagType::VideoOnly, + format, + resolution: Some(resolution), + fps, + avg_bitrate_kbps: None, + } +} + +/// The 53-entry table. +pub static ITAG_TABLE: &[ItagItem] = &[ + // Progressive (video+audio combined) + av(17, MediaFormat::V3GPP, "144p"), + av(36, MediaFormat::V3GPP, "240p"), + av(18, MediaFormat::Mpeg4, "360p"), + av(34, MediaFormat::Mpeg4, "360p"), + av(35, MediaFormat::Mpeg4, "480p"), + av(59, MediaFormat::Mpeg4, "480p"), + av(78, MediaFormat::Mpeg4, "480p"), + av(22, MediaFormat::Mpeg4, "720p"), + av(37, MediaFormat::Mpeg4, "1080p"), + av(38, MediaFormat::Mpeg4, "1080p"), + av(43, MediaFormat::Webm, "360p"), + av(44, MediaFormat::Webm, "480p"), + av(45, MediaFormat::Webm, "720p"), + av(46, MediaFormat::Webm, "1080p"), + // Adaptive audio + audio(171, MediaFormat::Webma, 128), + audio(172, MediaFormat::Webma, 256), + audio(599, MediaFormat::M4A, 32), + audio(139, MediaFormat::M4A, 48), + audio(140, MediaFormat::M4A, 128), + audio(141, MediaFormat::M4A, 256), + audio(600, MediaFormat::WebmaOpus, 35), + audio(249, MediaFormat::WebmaOpus, 50), + audio(250, MediaFormat::WebmaOpus, 70), + audio(251, MediaFormat::WebmaOpus, 160), + // Adaptive video-only (MP4 / AVC + AV1) + vo(160, MediaFormat::Mpeg4, "144p", 30), + vo(394, MediaFormat::Mpeg4, "144p", 30), + vo(133, MediaFormat::Mpeg4, "240p", 30), + vo(395, MediaFormat::Mpeg4, "240p", 30), + vo(134, MediaFormat::Mpeg4, "360p", 30), + vo(396, MediaFormat::Mpeg4, "360p", 30), + vo(135, MediaFormat::Mpeg4, "480p", 30), + vo(212, MediaFormat::Mpeg4, "480p", 30), + vo(397, MediaFormat::Mpeg4, "480p", 30), + vo(136, MediaFormat::Mpeg4, "720p", 30), + vo(398, MediaFormat::Mpeg4, "720p", 30), + vo(298, MediaFormat::Mpeg4, "720p60", 60), + vo(137, MediaFormat::Mpeg4, "1080p", 30), + vo(399, MediaFormat::Mpeg4, "1080p", 30), + vo(299, MediaFormat::Mpeg4, "1080p60", 60), + vo(400, MediaFormat::Mpeg4, "1440p", 30), + vo(266, MediaFormat::Mpeg4, "2160p", 30), + vo(401, MediaFormat::Mpeg4, "2160p", 30), + // Adaptive video-only (WEBM / VP9) + vo(278, MediaFormat::Webm, "144p", 30), + vo(242, MediaFormat::Webm, "240p", 30), + vo(243, MediaFormat::Webm, "360p", 30), + vo(244, MediaFormat::Webm, "480p", 30), + vo(245, MediaFormat::Webm, "480p", 30), + vo(246, MediaFormat::Webm, "480p", 30), + vo(247, MediaFormat::Webm, "720p", 30), + vo(248, MediaFormat::Webm, "1080p", 30), + vo(271, MediaFormat::Webm, "1440p", 30), + vo(272, MediaFormat::Webm, "2160p", 30), + vo(302, MediaFormat::Webm, "720p60", 60), + vo(303, MediaFormat::Webm, "1080p60", 60), + vo(308, MediaFormat::Webm, "1440p60", 60), + vo(313, MediaFormat::Webm, "2160p", 30), + vo(315, MediaFormat::Webm, "2160p60", 60), +]; + +pub fn lookup(itag_id: u32) -> Option<&'static ItagItem> { + ITAG_TABLE.iter().find(|it| it.id == itag_id) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn table_has_57_entries() { + // Audit Track A §7 says "53" in prose but tallies the same 57 + // entries below. NPE source ItagItem.java has 57 distinct itag + // IDs. Matches exactly. + assert_eq!(ITAG_TABLE.len(), 57); + } + + #[test] + fn itag_140_is_aac_128() { + let it = lookup(140).unwrap(); + assert_eq!(it.item_type, ItagType::Audio); + assert_eq!(it.format, MediaFormat::M4A); + assert_eq!(it.avg_bitrate_kbps, Some(128)); + } + + #[test] + fn itag_22_is_progressive_720p_mp4() { + let it = lookup(22).unwrap(); + assert_eq!(it.item_type, ItagType::Video); + assert_eq!(it.format, MediaFormat::Mpeg4); + assert_eq!(it.resolution, Some("720p")); + } + + #[test] + fn itag_315_is_2160p60_vp9_video_only() { + let it = lookup(315).unwrap(); + assert_eq!(it.item_type, ItagType::VideoOnly); + assert_eq!(it.format, MediaFormat::Webm); + assert_eq!(it.resolution, Some("2160p60")); + assert_eq!(it.fps, 60); + } + + #[test] + fn unknown_itag_returns_none() { + assert!(lookup(99999).is_none()); + } + + #[test] + fn mime_and_extension() { + assert_eq!(MediaFormat::M4A.mime(), "audio/mp4"); + assert_eq!(MediaFormat::M4A.extension(), "m4a"); + assert_eq!(MediaFormat::Webm.mime(), "video/webm"); + } +} diff --git a/src/youtube/mod.rs b/src/youtube/mod.rs index 8c25046..33cb4ec 100644 --- a/src/youtube/mod.rs +++ b/src/youtube/mod.rs @@ -1,5 +1,11 @@ -// YouTube service tree. Phase 2 lands the JS deobfuscator (the keystone -// risk per SPEC §9). Phase 3+ lands the InnerTube client matrix, itag -// table, stream extractor, search, channel, etc. +// YouTube service tree. Phase 2 landed the JS deobfuscator. Phase 3 adds +// the InnerTube client matrix, request envelope, parsing helpers, and the +// itag table. Phase 4+ will add the stream extractor, search, channel, +// playlist, kiosks. +pub mod client_request; +pub mod constants; +pub mod itag; pub mod js; +pub mod parsing; + diff --git a/src/youtube/parsing.rs b/src/youtube/parsing.rs new file mode 100644 index 0000000..65b9bf4 --- /dev/null +++ b/src/youtube/parsing.rs @@ -0,0 +1,232 @@ +// YoutubeParsingHelper-shaped helpers — mirrors NPE +// services/youtube/YoutubeParsingHelper.java. +// +// Currently implements: +// * consent toggle + cookie generator (set_consent_accepted, consent_cookie) +// * client-version cache + sw.js fetch fallback (get_web_client_version) +// * visitor-data bootstrap via /youtubei/v1/visitor_id +// * client/origin/referer header builder +// +// PoToken integration lands in Phase 5. po_token / DroidGuard / BotGuard +// machinery is host-provided (PoTokenProvider trait). + +use once_cell::sync::Lazy; +use parking_lot::RwLock; +use regex::Regex; +use serde_json::Value; + +use crate::downloader::request::Request; +use crate::downloader::Downloader; +use crate::exceptions::ParsingError; +use crate::localization::{ContentCountry, Localization}; +use crate::newpipe::NewPipe; +use crate::youtube::client_request::{ + build_envelope, InnertubeClientRequestInfo, +}; +use crate::youtube::constants::*; + +static CONSENT_ACCEPTED: Lazy> = Lazy::new(|| RwLock::new(false)); +static CACHED_WEB_CLIENT_VERSION: Lazy>> = Lazy::new(|| RwLock::new(None)); + +pub fn set_consent_accepted(accepted: bool) { + *CONSENT_ACCEPTED.write() = accepted; +} + +pub fn is_consent_accepted() -> bool { + *CONSENT_ACCEPTED.read() +} + +/// Returns the `SOCS=` consent cookie value. EU users need +/// `CAISAiAD` (accepted) to extract mix-playlist continuations. +pub fn consent_cookie() -> &'static str { + if is_consent_accepted() { + "SOCS=CAISAiAD" + } else { + "SOCS=CAE=" + } +} + +/// Returns the cached WEB client version. Falls back to the hardcoded +/// constant if no live extraction has run. +pub fn web_client_version() -> String { + if let Some(v) = CACHED_WEB_CLIENT_VERSION.read().as_ref() { + return v.clone(); + } + WEB_HARDCODED_CLIENT_VERSION.to_string() +} + +pub fn reset_web_client_version_cache() { + *CACHED_WEB_CLIENT_VERSION.write() = None; +} + +static SW_JS_VERSION_RE: Lazy = Lazy::new(|| { + Regex::new(r#"INNERTUBE_CONTEXT_CLIENT_VERSION":\s*"([^"]+)""#).unwrap() +}); + +/// Fetches sw.js + extracts the live WEB client version. Caches the +/// result. Returns the cached value if already known. +pub fn discover_web_client_version() -> Result { + if let Some(v) = CACHED_WEB_CLIENT_VERSION.read().as_ref() { + return Ok(v.clone()); + } + let downloader = NewPipe::downloader() + .ok_or_else(|| ParsingError::Invalid("downloader not initialized".into()))?; + let req = Request::get("https://www.youtube.com/sw.js") + .add_header("Origin", "https://www.youtube.com") + .add_header("Referer", "https://www.youtube.com") + .build(); + let resp = downloader + .execute(req) + .map_err(|e| ParsingError::Invalid(format!("sw.js fetch: {e}")))?; + if resp.response_code() != 200 { + return Err(ParsingError::Invalid(format!( + "sw.js HTTP {}", + resp.response_code() + ))); + } + let version = SW_JS_VERSION_RE + .captures(resp.response_body()) + .and_then(|c| c.get(1)) + .map(|m| m.as_str().to_string()) + .ok_or_else(|| ParsingError::RegexMiss("INNERTUBE_CONTEXT_CLIENT_VERSION".into()))?; + *CACHED_WEB_CLIENT_VERSION.write() = Some(version.clone()); + Ok(version) +} + +/// Headers for a WEB-flavor POST (JSON content-type, client headers, +/// origin/referer, consent cookie). +pub fn youtube_post_headers() -> Vec<(String, String)> { + vec![ + ("Content-Type".into(), "application/json".into()), + ("X-YouTube-Client-Name".into(), WEB_CLIENT_ID.into()), + ("X-YouTube-Client-Version".into(), web_client_version()), + ("Origin".into(), "https://www.youtube.com".into()), + ("Referer".into(), "https://www.youtube.com".into()), + ("Cookie".into(), consent_cookie().into()), + ] +} + +/// Mobile (Android/iOS) POST headers — UA + format-version only. No +/// X-YouTube-Client-Name, no Origin/Referer, no Cookie (audit Track A §6.2). +pub fn mobile_post_headers(user_agent: &str) -> Vec<(String, String)> { + vec![ + ("Content-Type".into(), "application/json".into()), + ("User-Agent".into(), user_agent.into()), + ("X-Goog-Api-Format-Version".into(), "2".into()), + ] +} + +pub fn android_user_agent(country: &ContentCountry) -> String { + format!( + "com.google.android.youtube/{ANDROID_CLIENT_VERSION} (Linux; U; Android 15; {}) gzip", + country.country_code() + ) +} + +pub fn ios_user_agent(country: &ContentCountry) -> String { + format!( + "com.google.ios.youtube/{IOS_CLIENT_VERSION}({IOS_DEVICE_MODEL}; U; CPU iOS {IOS_USER_AGENT_VERSION} like Mac OS X; {})", + country.country_code() + ) +} + +/// Bootstraps a visitor_data token via `/youtubei/v1/visitor_id`. Returns +/// the value of `responseContext.visitorData` from the response. +pub fn bootstrap_visitor_data( + info: &InnertubeClientRequestInfo, + localization: &Localization, + content_country: &ContentCountry, + use_gapis_endpoint: bool, +) -> Result { + let downloader = NewPipe::downloader() + .ok_or_else(|| ParsingError::Invalid("downloader not initialized".into()))?; + let envelope = build_envelope(info, localization, content_country, None); + let body = serde_json::to_vec(&envelope)?; + + let base = if use_gapis_endpoint { + YOUTUBEI_V1_GAPIS_URL + } else { + YOUTUBEI_V1_URL + }; + let url = format!("{base}visitor_id{DISABLE_PRETTY_PRINT_PARAM}"); + + let mut req_builder = Request::post(&url, body); + for (k, v) in youtube_post_headers() { + req_builder = req_builder.add_header(&k, &v); + } + let resp = downloader + .execute(req_builder.build()) + .map_err(|e| ParsingError::Invalid(format!("visitor_id POST: {e}")))?; + if resp.response_code() != 200 { + return Err(ParsingError::Invalid(format!( + "visitor_id HTTP {}", + resp.response_code() + ))); + } + let parsed: Value = serde_json::from_str(resp.response_body())?; + parsed + .get("responseContext") + .and_then(|rc| rc.get("visitorData")) + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) + .ok_or_else(|| ParsingError::MissingField("responseContext.visitorData".into())) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn consent_toggle_flips_cookie() { + set_consent_accepted(false); + assert_eq!(consent_cookie(), "SOCS=CAE="); + set_consent_accepted(true); + assert_eq!(consent_cookie(), "SOCS=CAISAiAD"); + set_consent_accepted(false); // reset for other tests + } + + #[test] + fn web_client_version_falls_back_to_hardcoded() { + reset_web_client_version_cache(); + assert_eq!(web_client_version(), WEB_HARDCODED_CLIENT_VERSION); + } + + #[test] + fn mobile_headers_omit_client_name_and_referer() { + let h = mobile_post_headers("ua/1.0"); + let keys: Vec<&str> = h.iter().map(|(k, _)| k.as_str()).collect(); + assert!(keys.contains(&"User-Agent")); + assert!(keys.contains(&"X-Goog-Api-Format-Version")); + assert!(!keys.contains(&"X-YouTube-Client-Name")); + assert!(!keys.contains(&"Origin")); + assert!(!keys.contains(&"Referer")); + assert!(!keys.contains(&"Cookie")); + } + + #[test] + fn web_headers_include_consent_and_client_id() { + let h = youtube_post_headers(); + let keys: Vec<&str> = h.iter().map(|(k, _)| k.as_str()).collect(); + assert!(keys.contains(&"X-YouTube-Client-Name")); + assert!(keys.contains(&"Origin")); + assert!(keys.contains(&"Cookie")); + } + + #[test] + fn android_ua_template() { + let ua = android_user_agent(&ContentCountry::new("DE")); + assert!(ua.contains("com.google.android.youtube/21.03.36")); + assert!(ua.contains("Android 15")); + assert!(ua.contains("; DE)")); + assert!(ua.ends_with(" gzip")); + } + + #[test] + fn ios_ua_template() { + let ua = ios_user_agent(&ContentCountry::new("US")); + assert!(ua.contains("com.google.ios.youtube/21.03.2")); + assert!(ua.contains("iPhone16,2")); + assert!(ua.contains("CPU iOS 18_7_2")); + assert!(ua.contains("; US)")); + } +}