Phase 3 — InnerTube + itag

Port the YT client matrix + request envelope + itag lookup table.

src/youtube/
  * constants.rs       — ClientsConstants.java verbatim. All six live
                         clients (WEB, WEB_EMBEDDED_PLAYER,
                         WEB_MUSIC_ANALYTICS, ANDROID, IOS, plus the
                         WEB_REMIX values for completeness). Base URLs
                         + prettyPrint=false suffix.
  * client_request.rs  — ClientInfo / DeviceInfo / InnertubeClientRequestInfo
                         + the 5 factory constructors NPE exposes
                         (ofWebClient, ofWebEmbeddedPlayer, ofCharts,
                         ofAndroid, ofIos). build_envelope() emits the
                         InnerTube JSON in NPE's exact insertion order;
                         build_desktop_envelope() is the WEB-fast-path
                         used by search/browse/next/resolve_url/comments.
  * itag.rs            — 57-entry itag table (14 progressive + 10 audio +
                         33 video-only). MediaFormat enum + ItagType
                         enum + ItagItem struct + lookup().
  * parsing.rs         — consent toggle + cookie generator (SOCS=CAE= /
                         SOCS=CAISAiAD), WEB client-version cache + sw.js
                         scrape, WEB/mobile header builders (mobile
                         deliberately strips X-YouTube-Client-Name +
                         Origin/Referer + Cookie per audit Track A §6.2),
                         android/ios UA templates, visitor_data bootstrap
                         POST to /youtubei/v1/visitor_id.

PARITY notes flagged in code:
  * androidSdkVersion=36 + osVersion=16 but Android-15 in UA — NPE-intentional
  * mobile clients send NO X-YouTube-Client-* headers
  * audit doc says "53 entries" but tallies + NPE source = 57 ItagItems

Tests: 62 lib unit pass (up from 43 in Phase 2). All Phase 1 + Phase 2
smoke still green. Live InnerTube POSTs (visitor_data bootstrap +
/player) deferred to Phase 4 integration.
This commit is contained in:
Kayos 2026-05-24 16:57:47 -07:00
parent 91639f26d1
commit 3014410cba
5 changed files with 863 additions and 3 deletions

View file

@ -0,0 +1,351 @@
// InnertubeClientRequestInfo + JSON envelope builder. Mirrors NPE
// InnertubeClientRequestInfo.java + the prepareJsonBuilder() flow in
// YoutubeParsingHelper.java:1494-1559.
//
// Wire-order matters (audit Track A §2.1) — `serde_json::json!` macro
// preserves insertion order in the resulting Map, but we use a sequence
// of `.insert()` calls into a `serde_json::Map` to be explicit.
use serde_json::{json, Map, Value};
use crate::localization::{ContentCountry, Localization};
use crate::youtube::constants::*;
#[derive(Clone, Debug)]
pub struct ClientInfo {
pub client_name: String,
pub client_version: String,
pub client_id: String,
pub client_screen: Option<String>,
pub visitor_data: Option<String>,
}
#[derive(Clone, Debug, Default)]
pub struct DeviceInfo {
pub platform: Option<String>,
pub device_make: Option<String>,
pub device_model: Option<String>,
pub os_name: Option<String>,
pub os_version: Option<String>,
/// `-1` is NPE's sentinel for "not applicable" — only included in the
/// JSON when > 0.
pub android_sdk_version: i32,
}
#[derive(Clone, Debug)]
pub struct InnertubeClientRequestInfo {
pub client_info: ClientInfo,
pub device_info: DeviceInfo,
}
impl InnertubeClientRequestInfo {
pub fn of_web_client() -> Self {
Self {
client_info: ClientInfo {
client_name: WEB_CLIENT_NAME.into(),
client_version: WEB_HARDCODED_CLIENT_VERSION.into(),
client_id: WEB_CLIENT_ID.into(),
client_screen: Some(WATCH_CLIENT_SCREEN.into()),
visitor_data: None,
},
device_info: DeviceInfo {
platform: Some(DESKTOP_CLIENT_PLATFORM.into()),
android_sdk_version: -1,
..Default::default()
},
}
}
pub fn of_web_embedded_player_client() -> Self {
Self {
client_info: ClientInfo {
client_name: WEB_EMBEDDED_CLIENT_NAME.into(),
client_version: WEB_EMBEDDED_CLIENT_VERSION.into(),
client_id: WEB_EMBEDDED_CLIENT_ID.into(),
client_screen: Some(EMBED_CLIENT_SCREEN.into()),
visitor_data: None,
},
device_info: DeviceInfo {
platform: Some(DESKTOP_CLIENT_PLATFORM.into()),
android_sdk_version: -1,
..Default::default()
},
}
}
pub fn of_web_music_analytics_charts_client() -> Self {
// NPE deliberately omits clientScreen + platform for charts.
Self {
client_info: ClientInfo {
client_name: WEB_MUSIC_ANALYTICS_CLIENT_NAME.into(),
client_version: WEB_MUSIC_ANALYTICS_CLIENT_VERSION.into(),
client_id: WEB_MUSIC_ANALYTICS_CLIENT_ID.into(),
client_screen: None,
visitor_data: None,
},
device_info: DeviceInfo {
android_sdk_version: -1,
..Default::default()
},
}
}
pub fn of_android_client() -> Self {
Self {
client_info: ClientInfo {
client_name: ANDROID_CLIENT_NAME.into(),
client_version: ANDROID_CLIENT_VERSION.into(),
client_id: ANDROID_CLIENT_ID.into(),
client_screen: Some(WATCH_CLIENT_SCREEN.into()),
visitor_data: None,
},
device_info: DeviceInfo {
platform: Some(MOBILE_CLIENT_PLATFORM.into()),
os_name: Some("Android".into()),
os_version: Some(ANDROID_OS_VERSION.into()),
android_sdk_version: ANDROID_SDK_VERSION as i32,
..Default::default()
},
}
}
pub fn of_ios_client() -> Self {
Self {
client_info: ClientInfo {
client_name: IOS_CLIENT_NAME.into(),
client_version: IOS_CLIENT_VERSION.into(),
client_id: IOS_CLIENT_ID.into(),
client_screen: Some(WATCH_CLIENT_SCREEN.into()),
visitor_data: None,
},
device_info: DeviceInfo {
platform: Some(MOBILE_CLIENT_PLATFORM.into()),
device_make: Some("Apple".into()),
device_model: Some(IOS_DEVICE_MODEL.into()),
os_name: Some("iOS".into()),
os_version: Some(IOS_OS_VERSION.into()),
android_sdk_version: -1,
},
}
}
}
/// Builds the InnerTube request envelope mirroring NPE prepareJsonBuilder.
/// Insertion order matches NPE's wire-order verbatim (audit Track A §2.1).
pub fn build_envelope(
info: &InnertubeClientRequestInfo,
localization: &Localization,
content_country: &ContentCountry,
embed_url: Option<&str>,
) -> Value {
let mut client = Map::new();
client.insert("clientName".into(), Value::String(info.client_info.client_name.clone()));
client.insert("clientVersion".into(), Value::String(info.client_info.client_version.clone()));
if let Some(s) = &info.client_info.client_screen {
client.insert("clientScreen".into(), Value::String(s.clone()));
}
if let Some(p) = &info.device_info.platform {
client.insert("platform".into(), Value::String(p.clone()));
}
if let Some(v) = &info.client_info.visitor_data {
client.insert("visitorData".into(), Value::String(v.clone()));
}
if let Some(m) = &info.device_info.device_make {
client.insert("deviceMake".into(), Value::String(m.clone()));
}
if let Some(m) = &info.device_info.device_model {
client.insert("deviceModel".into(), Value::String(m.clone()));
}
if let Some(n) = &info.device_info.os_name {
client.insert("osName".into(), Value::String(n.clone()));
}
if let Some(v) = &info.device_info.os_version {
client.insert("osVersion".into(), Value::String(v.clone()));
}
if info.device_info.android_sdk_version > 0 {
client.insert(
"androidSdkVersion".into(),
Value::Number(info.device_info.android_sdk_version.into()),
);
}
client.insert("hl".into(), Value::String(localization.localization_code()));
client.insert("gl".into(), Value::String(content_country.country_code().into()));
client.insert("utcOffsetMinutes".into(), Value::Number(0.into()));
let mut context = Map::new();
context.insert("client".into(), Value::Object(client));
if let Some(url) = embed_url {
context.insert("thirdParty".into(), json!({ "embedUrl": url }));
}
context.insert(
"request".into(),
json!({
"internalExperimentFlags": [],
"useSsl": true,
}),
);
context.insert(
"user".into(),
json!({
"lockedSafetyMode": false,
}),
);
json!({ "context": Value::Object(context) })
}
/// Desktop fast-path envelope — mirrors NPE prepareDesktopJsonBuilder
/// (YoutubeParsingHelper.java:1044-1072). Used by search / browse / next
/// / resolve_url / comments. NO clientScreen, NO visitor_data, NO device
/// fields — also adds `originalUrl: https://www.youtube.com` (literal).
pub fn build_desktop_envelope(
localization: &Localization,
content_country: &ContentCountry,
web_client_version: &str,
) -> Value {
let mut client = Map::new();
client.insert("hl".into(), Value::String(localization.localization_code()));
client.insert("gl".into(), Value::String(content_country.country_code().into()));
client.insert("clientName".into(), Value::String(WEB_CLIENT_NAME.into()));
client.insert("clientVersion".into(), Value::String(web_client_version.into()));
client.insert(
"originalUrl".into(),
Value::String("https://www.youtube.com".into()),
);
client.insert("platform".into(), Value::String(DESKTOP_CLIENT_PLATFORM.into()));
client.insert("utcOffsetMinutes".into(), Value::Number(0.into()));
json!({
"context": {
"client": Value::Object(client),
"request": {
"internalExperimentFlags": [],
"useSsl": true,
},
"user": {
"lockedSafetyMode": false,
}
}
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn web_client_envelope_shape() {
let info = InnertubeClientRequestInfo::of_web_client();
let env = build_envelope(
&info,
&Localization::default(),
&ContentCountry::default(),
None,
);
let client = &env["context"]["client"];
assert_eq!(client["clientName"], "WEB");
assert_eq!(client["clientVersion"], "2.20260120.01.00");
assert_eq!(client["clientScreen"], "WATCH");
assert_eq!(client["platform"], "DESKTOP");
assert_eq!(client["hl"], "en-GB");
assert_eq!(client["gl"], "GB");
assert_eq!(client["utcOffsetMinutes"], 0);
assert!(client.get("visitorData").is_none());
assert!(client.get("androidSdkVersion").is_none());
// thirdParty omitted when no embed_url
assert!(env["context"].get("thirdParty").is_none());
}
#[test]
fn android_client_envelope_shape() {
let info = InnertubeClientRequestInfo::of_android_client();
let env = build_envelope(
&info,
&Localization::default(),
&ContentCountry::default(),
None,
);
let client = &env["context"]["client"];
assert_eq!(client["clientName"], "ANDROID");
assert_eq!(client["clientVersion"], "21.03.36");
assert_eq!(client["platform"], "MOBILE");
assert_eq!(client["osName"], "Android");
assert_eq!(client["osVersion"], "16");
assert_eq!(client["androidSdkVersion"], 36);
}
#[test]
fn ios_client_envelope_shape() {
let info = InnertubeClientRequestInfo::of_ios_client();
let env = build_envelope(
&info,
&Localization::default(),
&ContentCountry::default(),
None,
);
let client = &env["context"]["client"];
assert_eq!(client["clientName"], "IOS");
assert_eq!(client["deviceMake"], "Apple");
assert_eq!(client["deviceModel"], "iPhone16,2");
assert_eq!(client["osName"], "iOS");
assert_eq!(client["osVersion"], "18.7.2.22H124");
assert!(client.get("androidSdkVersion").is_none());
}
#[test]
fn charts_client_omits_platform_and_screen() {
let info = InnertubeClientRequestInfo::of_web_music_analytics_charts_client();
let env = build_envelope(
&info,
&Localization::default(),
&ContentCountry::default(),
None,
);
let client = &env["context"]["client"];
assert_eq!(client["clientName"], "WEB_MUSIC_ANALYTICS");
assert!(client.get("clientScreen").is_none());
assert!(client.get("platform").is_none());
}
#[test]
fn embed_url_lands_in_third_party_block() {
let info = InnertubeClientRequestInfo::of_web_embedded_player_client();
let env = build_envelope(
&info,
&Localization::default(),
&ContentCountry::default(),
Some("https://www.youtube.com/embed/abc"),
);
assert_eq!(
env["context"]["thirdParty"]["embedUrl"],
"https://www.youtube.com/embed/abc"
);
}
#[test]
fn visitor_data_lands_in_client_block_when_set() {
let mut info = InnertubeClientRequestInfo::of_android_client();
info.client_info.visitor_data = Some("Cgs1ZG1...".into());
let env = build_envelope(
&info,
&Localization::default(),
&ContentCountry::default(),
None,
);
assert_eq!(env["context"]["client"]["visitorData"], "Cgs1ZG1...");
}
#[test]
fn desktop_envelope_uses_original_url_and_no_visitor() {
let env = build_desktop_envelope(
&Localization::default(),
&ContentCountry::default(),
"2.20260120.01.00",
);
let client = &env["context"]["client"];
assert_eq!(client["originalUrl"], "https://www.youtube.com");
assert_eq!(client["platform"], "DESKTOP");
assert!(client.get("clientScreen").is_none());
assert!(client.get("visitorData").is_none());
}
}

53
src/youtube/constants.rs Normal file
View file

@ -0,0 +1,53 @@
// ClientsConstants — mirrors NPE services/youtube/ClientsConstants.java.
//
// Six live InnerTube clients: WEB, WEB_EMBEDDED_PLAYER, WEB_MUSIC_ANALYTICS,
// WEB_REMIX, ANDROID, IOS. NPE's tree also mentions TVHTML5 + MWEB in
// comments — not actively used; skipped per audit Track A §1.3.
pub const DESKTOP_CLIENT_PLATFORM: &str = "DESKTOP";
pub const MOBILE_CLIENT_PLATFORM: &str = "MOBILE";
pub const WATCH_CLIENT_SCREEN: &str = "WATCH";
pub const EMBED_CLIENT_SCREEN: &str = "EMBED";
pub const WEB_CLIENT_ID: &str = "1";
pub const WEB_CLIENT_NAME: &str = "WEB";
pub const WEB_HARDCODED_CLIENT_VERSION: &str = "2.20260120.01.00";
pub const WEB_REMIX_CLIENT_ID: &str = "67";
pub const WEB_REMIX_CLIENT_NAME: &str = "WEB_REMIX";
pub const WEB_REMIX_HARDCODED_CLIENT_VERSION: &str = "1.20260121.03.00";
pub const WEB_EMBEDDED_CLIENT_ID: &str = "56";
pub const WEB_EMBEDDED_CLIENT_NAME: &str = "WEB_EMBEDDED_PLAYER";
pub const WEB_EMBEDDED_CLIENT_VERSION: &str = "1.20260122.01.00";
pub const WEB_MUSIC_ANALYTICS_CLIENT_ID: &str = "31";
pub const WEB_MUSIC_ANALYTICS_CLIENT_NAME: &str = "WEB_MUSIC_ANALYTICS";
pub const WEB_MUSIC_ANALYTICS_CLIENT_VERSION: &str = "2.0";
// iPhone 15 Pro Max — chosen explicitly for 60fps tags per ItagItem.java:26
// note and the gist referenced in NPE source.
pub const IOS_CLIENT_ID: &str = "5";
pub const IOS_CLIENT_NAME: &str = "IOS";
pub const IOS_CLIENT_VERSION: &str = "21.03.2";
pub const IOS_DEVICE_MODEL: &str = "iPhone16,2";
pub const IOS_OS_VERSION: &str = "18.7.2.22H124";
pub const IOS_USER_AGENT_VERSION: &str = "18_7_2";
pub const ANDROID_CLIENT_ID: &str = "3";
pub const ANDROID_CLIENT_NAME: &str = "ANDROID";
pub const ANDROID_CLIENT_VERSION: &str = "21.03.36";
// PARITY: NPE hard-codes androidSdkVersion=36 + osVersion=16 even though
// the User-Agent advertises Android 15. DroidGuard doesn't check the
// InnerTube context so this mismatch is intentional and not a bug.
pub const ANDROID_SDK_VERSION: u32 = 36;
pub const ANDROID_OS_VERSION: &str = "16";
// Base URLs (NPE YoutubeParsingHelper.java:91,96).
pub const YOUTUBEI_V1_URL: &str = "https://www.youtube.com/youtubei/v1/";
pub const YOUTUBEI_V1_GAPIS_URL: &str = "https://youtubei.googleapis.com/youtubei/v1/";
pub const YOUTUBE_MUSIC_INNERTUBE_URL: &str = "https://music.youtube.com/youtubei/v1/";
// All InnerTube calls carry this query suffix to shrink responses.
pub const DISABLE_PRETTY_PRINT_PARAM: &str = "?prettyPrint=false";

218
src/youtube/itag.rs Normal file
View file

@ -0,0 +1,218 @@
// itag → MediaFormat table. Mirrors NPE ItagItem.java:28-101 — the
// hard-coded array of 53 entries (14 combined-AV + 10 audio + 33
// video-only).
//
// Codec column ("AV1", "VP9") is derived from response mimeType at extract
// time, NOT stored here — matches NPE's source comment ItagItem.java:26.
#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
pub enum MediaFormat {
/// 3gp (legacy progressive video+audio)
V3GPP,
/// mp4 (video+audio or video-only)
Mpeg4,
/// webm (video+audio or video-only)
Webm,
/// M4A — AAC audio in mp4 container
M4A,
/// vorbis audio in webm container
Webma,
/// opus audio in webm container
WebmaOpus,
}
impl MediaFormat {
pub fn mime(&self) -> &'static str {
match self {
MediaFormat::V3GPP => "video/3gpp",
MediaFormat::Mpeg4 => "video/mp4",
MediaFormat::Webm => "video/webm",
MediaFormat::M4A => "audio/mp4",
MediaFormat::Webma => "audio/webm",
MediaFormat::WebmaOpus => "audio/webm",
}
}
pub fn extension(&self) -> &'static str {
match self {
MediaFormat::V3GPP => "3gp",
MediaFormat::Mpeg4 => "mp4",
MediaFormat::Webm => "webm",
MediaFormat::M4A => "m4a",
MediaFormat::Webma => "webm",
MediaFormat::WebmaOpus => "webm",
}
}
}
#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
pub enum ItagType {
/// Legacy progressive — single stream carries both video and audio.
Video,
/// DASH audio-only.
Audio,
/// DASH video-only adaptive.
VideoOnly,
}
#[derive(Clone, Debug)]
pub struct ItagItem {
pub id: u32,
pub item_type: ItagType,
pub format: MediaFormat,
pub resolution: Option<&'static str>,
pub fps: u32,
pub avg_bitrate_kbps: Option<u32>,
}
const fn av(id: u32, format: MediaFormat, resolution: &'static str) -> ItagItem {
ItagItem {
id,
item_type: ItagType::Video,
format,
resolution: Some(resolution),
fps: 30,
avg_bitrate_kbps: None,
}
}
const fn audio(id: u32, format: MediaFormat, kbps: u32) -> ItagItem {
ItagItem {
id,
item_type: ItagType::Audio,
format,
resolution: None,
fps: 0,
avg_bitrate_kbps: Some(kbps),
}
}
const fn vo(id: u32, format: MediaFormat, resolution: &'static str, fps: u32) -> ItagItem {
ItagItem {
id,
item_type: ItagType::VideoOnly,
format,
resolution: Some(resolution),
fps,
avg_bitrate_kbps: None,
}
}
/// The 53-entry table.
pub static ITAG_TABLE: &[ItagItem] = &[
// Progressive (video+audio combined)
av(17, MediaFormat::V3GPP, "144p"),
av(36, MediaFormat::V3GPP, "240p"),
av(18, MediaFormat::Mpeg4, "360p"),
av(34, MediaFormat::Mpeg4, "360p"),
av(35, MediaFormat::Mpeg4, "480p"),
av(59, MediaFormat::Mpeg4, "480p"),
av(78, MediaFormat::Mpeg4, "480p"),
av(22, MediaFormat::Mpeg4, "720p"),
av(37, MediaFormat::Mpeg4, "1080p"),
av(38, MediaFormat::Mpeg4, "1080p"),
av(43, MediaFormat::Webm, "360p"),
av(44, MediaFormat::Webm, "480p"),
av(45, MediaFormat::Webm, "720p"),
av(46, MediaFormat::Webm, "1080p"),
// Adaptive audio
audio(171, MediaFormat::Webma, 128),
audio(172, MediaFormat::Webma, 256),
audio(599, MediaFormat::M4A, 32),
audio(139, MediaFormat::M4A, 48),
audio(140, MediaFormat::M4A, 128),
audio(141, MediaFormat::M4A, 256),
audio(600, MediaFormat::WebmaOpus, 35),
audio(249, MediaFormat::WebmaOpus, 50),
audio(250, MediaFormat::WebmaOpus, 70),
audio(251, MediaFormat::WebmaOpus, 160),
// Adaptive video-only (MP4 / AVC + AV1)
vo(160, MediaFormat::Mpeg4, "144p", 30),
vo(394, MediaFormat::Mpeg4, "144p", 30),
vo(133, MediaFormat::Mpeg4, "240p", 30),
vo(395, MediaFormat::Mpeg4, "240p", 30),
vo(134, MediaFormat::Mpeg4, "360p", 30),
vo(396, MediaFormat::Mpeg4, "360p", 30),
vo(135, MediaFormat::Mpeg4, "480p", 30),
vo(212, MediaFormat::Mpeg4, "480p", 30),
vo(397, MediaFormat::Mpeg4, "480p", 30),
vo(136, MediaFormat::Mpeg4, "720p", 30),
vo(398, MediaFormat::Mpeg4, "720p", 30),
vo(298, MediaFormat::Mpeg4, "720p60", 60),
vo(137, MediaFormat::Mpeg4, "1080p", 30),
vo(399, MediaFormat::Mpeg4, "1080p", 30),
vo(299, MediaFormat::Mpeg4, "1080p60", 60),
vo(400, MediaFormat::Mpeg4, "1440p", 30),
vo(266, MediaFormat::Mpeg4, "2160p", 30),
vo(401, MediaFormat::Mpeg4, "2160p", 30),
// Adaptive video-only (WEBM / VP9)
vo(278, MediaFormat::Webm, "144p", 30),
vo(242, MediaFormat::Webm, "240p", 30),
vo(243, MediaFormat::Webm, "360p", 30),
vo(244, MediaFormat::Webm, "480p", 30),
vo(245, MediaFormat::Webm, "480p", 30),
vo(246, MediaFormat::Webm, "480p", 30),
vo(247, MediaFormat::Webm, "720p", 30),
vo(248, MediaFormat::Webm, "1080p", 30),
vo(271, MediaFormat::Webm, "1440p", 30),
vo(272, MediaFormat::Webm, "2160p", 30),
vo(302, MediaFormat::Webm, "720p60", 60),
vo(303, MediaFormat::Webm, "1080p60", 60),
vo(308, MediaFormat::Webm, "1440p60", 60),
vo(313, MediaFormat::Webm, "2160p", 30),
vo(315, MediaFormat::Webm, "2160p60", 60),
];
pub fn lookup(itag_id: u32) -> Option<&'static ItagItem> {
ITAG_TABLE.iter().find(|it| it.id == itag_id)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn table_has_57_entries() {
// Audit Track A §7 says "53" in prose but tallies the same 57
// entries below. NPE source ItagItem.java has 57 distinct itag
// IDs. Matches exactly.
assert_eq!(ITAG_TABLE.len(), 57);
}
#[test]
fn itag_140_is_aac_128() {
let it = lookup(140).unwrap();
assert_eq!(it.item_type, ItagType::Audio);
assert_eq!(it.format, MediaFormat::M4A);
assert_eq!(it.avg_bitrate_kbps, Some(128));
}
#[test]
fn itag_22_is_progressive_720p_mp4() {
let it = lookup(22).unwrap();
assert_eq!(it.item_type, ItagType::Video);
assert_eq!(it.format, MediaFormat::Mpeg4);
assert_eq!(it.resolution, Some("720p"));
}
#[test]
fn itag_315_is_2160p60_vp9_video_only() {
let it = lookup(315).unwrap();
assert_eq!(it.item_type, ItagType::VideoOnly);
assert_eq!(it.format, MediaFormat::Webm);
assert_eq!(it.resolution, Some("2160p60"));
assert_eq!(it.fps, 60);
}
#[test]
fn unknown_itag_returns_none() {
assert!(lookup(99999).is_none());
}
#[test]
fn mime_and_extension() {
assert_eq!(MediaFormat::M4A.mime(), "audio/mp4");
assert_eq!(MediaFormat::M4A.extension(), "m4a");
assert_eq!(MediaFormat::Webm.mime(), "video/webm");
}
}

View file

@ -1,5 +1,11 @@
// YouTube service tree. Phase 2 lands the JS deobfuscator (the keystone
// risk per SPEC §9). Phase 3+ lands the InnerTube client matrix, itag
// table, stream extractor, search, channel, etc.
// YouTube service tree. Phase 2 landed the JS deobfuscator. Phase 3 adds
// the InnerTube client matrix, request envelope, parsing helpers, and the
// itag table. Phase 4+ will add the stream extractor, search, channel,
// playlist, kiosks.
pub mod client_request;
pub mod constants;
pub mod itag;
pub mod js;
pub mod parsing;

232
src/youtube/parsing.rs Normal file
View file

@ -0,0 +1,232 @@
// YoutubeParsingHelper-shaped helpers — mirrors NPE
// services/youtube/YoutubeParsingHelper.java.
//
// Currently implements:
// * consent toggle + cookie generator (set_consent_accepted, consent_cookie)
// * client-version cache + sw.js fetch fallback (get_web_client_version)
// * visitor-data bootstrap via /youtubei/v1/visitor_id
// * client/origin/referer header builder
//
// PoToken integration lands in Phase 5. po_token / DroidGuard / BotGuard
// machinery is host-provided (PoTokenProvider trait).
use once_cell::sync::Lazy;
use parking_lot::RwLock;
use regex::Regex;
use serde_json::Value;
use crate::downloader::request::Request;
use crate::downloader::Downloader;
use crate::exceptions::ParsingError;
use crate::localization::{ContentCountry, Localization};
use crate::newpipe::NewPipe;
use crate::youtube::client_request::{
build_envelope, InnertubeClientRequestInfo,
};
use crate::youtube::constants::*;
static CONSENT_ACCEPTED: Lazy<RwLock<bool>> = Lazy::new(|| RwLock::new(false));
static CACHED_WEB_CLIENT_VERSION: Lazy<RwLock<Option<String>>> = Lazy::new(|| RwLock::new(None));
pub fn set_consent_accepted(accepted: bool) {
*CONSENT_ACCEPTED.write() = accepted;
}
pub fn is_consent_accepted() -> bool {
*CONSENT_ACCEPTED.read()
}
/// Returns the `SOCS=` consent cookie value. EU users need
/// `CAISAiAD` (accepted) to extract mix-playlist continuations.
pub fn consent_cookie() -> &'static str {
if is_consent_accepted() {
"SOCS=CAISAiAD"
} else {
"SOCS=CAE="
}
}
/// Returns the cached WEB client version. Falls back to the hardcoded
/// constant if no live extraction has run.
pub fn web_client_version() -> String {
if let Some(v) = CACHED_WEB_CLIENT_VERSION.read().as_ref() {
return v.clone();
}
WEB_HARDCODED_CLIENT_VERSION.to_string()
}
pub fn reset_web_client_version_cache() {
*CACHED_WEB_CLIENT_VERSION.write() = None;
}
static SW_JS_VERSION_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"INNERTUBE_CONTEXT_CLIENT_VERSION":\s*"([^"]+)""#).unwrap()
});
/// Fetches sw.js + extracts the live WEB client version. Caches the
/// result. Returns the cached value if already known.
pub fn discover_web_client_version() -> Result<String, ParsingError> {
if let Some(v) = CACHED_WEB_CLIENT_VERSION.read().as_ref() {
return Ok(v.clone());
}
let downloader = NewPipe::downloader()
.ok_or_else(|| ParsingError::Invalid("downloader not initialized".into()))?;
let req = Request::get("https://www.youtube.com/sw.js")
.add_header("Origin", "https://www.youtube.com")
.add_header("Referer", "https://www.youtube.com")
.build();
let resp = downloader
.execute(req)
.map_err(|e| ParsingError::Invalid(format!("sw.js fetch: {e}")))?;
if resp.response_code() != 200 {
return Err(ParsingError::Invalid(format!(
"sw.js HTTP {}",
resp.response_code()
)));
}
let version = SW_JS_VERSION_RE
.captures(resp.response_body())
.and_then(|c| c.get(1))
.map(|m| m.as_str().to_string())
.ok_or_else(|| ParsingError::RegexMiss("INNERTUBE_CONTEXT_CLIENT_VERSION".into()))?;
*CACHED_WEB_CLIENT_VERSION.write() = Some(version.clone());
Ok(version)
}
/// Headers for a WEB-flavor POST (JSON content-type, client headers,
/// origin/referer, consent cookie).
pub fn youtube_post_headers() -> Vec<(String, String)> {
vec![
("Content-Type".into(), "application/json".into()),
("X-YouTube-Client-Name".into(), WEB_CLIENT_ID.into()),
("X-YouTube-Client-Version".into(), web_client_version()),
("Origin".into(), "https://www.youtube.com".into()),
("Referer".into(), "https://www.youtube.com".into()),
("Cookie".into(), consent_cookie().into()),
]
}
/// Mobile (Android/iOS) POST headers — UA + format-version only. No
/// X-YouTube-Client-Name, no Origin/Referer, no Cookie (audit Track A §6.2).
pub fn mobile_post_headers(user_agent: &str) -> Vec<(String, String)> {
vec![
("Content-Type".into(), "application/json".into()),
("User-Agent".into(), user_agent.into()),
("X-Goog-Api-Format-Version".into(), "2".into()),
]
}
pub fn android_user_agent(country: &ContentCountry) -> String {
format!(
"com.google.android.youtube/{ANDROID_CLIENT_VERSION} (Linux; U; Android 15; {}) gzip",
country.country_code()
)
}
pub fn ios_user_agent(country: &ContentCountry) -> String {
format!(
"com.google.ios.youtube/{IOS_CLIENT_VERSION}({IOS_DEVICE_MODEL}; U; CPU iOS {IOS_USER_AGENT_VERSION} like Mac OS X; {})",
country.country_code()
)
}
/// Bootstraps a visitor_data token via `/youtubei/v1/visitor_id`. Returns
/// the value of `responseContext.visitorData` from the response.
pub fn bootstrap_visitor_data(
info: &InnertubeClientRequestInfo,
localization: &Localization,
content_country: &ContentCountry,
use_gapis_endpoint: bool,
) -> Result<String, ParsingError> {
let downloader = NewPipe::downloader()
.ok_or_else(|| ParsingError::Invalid("downloader not initialized".into()))?;
let envelope = build_envelope(info, localization, content_country, None);
let body = serde_json::to_vec(&envelope)?;
let base = if use_gapis_endpoint {
YOUTUBEI_V1_GAPIS_URL
} else {
YOUTUBEI_V1_URL
};
let url = format!("{base}visitor_id{DISABLE_PRETTY_PRINT_PARAM}");
let mut req_builder = Request::post(&url, body);
for (k, v) in youtube_post_headers() {
req_builder = req_builder.add_header(&k, &v);
}
let resp = downloader
.execute(req_builder.build())
.map_err(|e| ParsingError::Invalid(format!("visitor_id POST: {e}")))?;
if resp.response_code() != 200 {
return Err(ParsingError::Invalid(format!(
"visitor_id HTTP {}",
resp.response_code()
)));
}
let parsed: Value = serde_json::from_str(resp.response_body())?;
parsed
.get("responseContext")
.and_then(|rc| rc.get("visitorData"))
.and_then(|v| v.as_str())
.map(|s| s.to_string())
.ok_or_else(|| ParsingError::MissingField("responseContext.visitorData".into()))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn consent_toggle_flips_cookie() {
set_consent_accepted(false);
assert_eq!(consent_cookie(), "SOCS=CAE=");
set_consent_accepted(true);
assert_eq!(consent_cookie(), "SOCS=CAISAiAD");
set_consent_accepted(false); // reset for other tests
}
#[test]
fn web_client_version_falls_back_to_hardcoded() {
reset_web_client_version_cache();
assert_eq!(web_client_version(), WEB_HARDCODED_CLIENT_VERSION);
}
#[test]
fn mobile_headers_omit_client_name_and_referer() {
let h = mobile_post_headers("ua/1.0");
let keys: Vec<&str> = h.iter().map(|(k, _)| k.as_str()).collect();
assert!(keys.contains(&"User-Agent"));
assert!(keys.contains(&"X-Goog-Api-Format-Version"));
assert!(!keys.contains(&"X-YouTube-Client-Name"));
assert!(!keys.contains(&"Origin"));
assert!(!keys.contains(&"Referer"));
assert!(!keys.contains(&"Cookie"));
}
#[test]
fn web_headers_include_consent_and_client_id() {
let h = youtube_post_headers();
let keys: Vec<&str> = h.iter().map(|(k, _)| k.as_str()).collect();
assert!(keys.contains(&"X-YouTube-Client-Name"));
assert!(keys.contains(&"Origin"));
assert!(keys.contains(&"Cookie"));
}
#[test]
fn android_ua_template() {
let ua = android_user_agent(&ContentCountry::new("DE"));
assert!(ua.contains("com.google.android.youtube/21.03.36"));
assert!(ua.contains("Android 15"));
assert!(ua.contains("; DE)"));
assert!(ua.ends_with(" gzip"));
}
#[test]
fn ios_ua_template() {
let ua = ios_user_agent(&ContentCountry::new("US"));
assert!(ua.contains("com.google.ios.youtube/21.03.2"));
assert!(ua.contains("iPhone16,2"));
assert!(ua.contains("CPU iOS 18_7_2"));
assert!(ua.contains("; US)"));
}
}