Phase 3 — InnerTube + itag
Port the YT client matrix + request envelope + itag lookup table.
src/youtube/
* constants.rs — ClientsConstants.java verbatim. All six live
clients (WEB, WEB_EMBEDDED_PLAYER,
WEB_MUSIC_ANALYTICS, ANDROID, IOS, plus the
WEB_REMIX values for completeness). Base URLs
+ prettyPrint=false suffix.
* client_request.rs — ClientInfo / DeviceInfo / InnertubeClientRequestInfo
+ the 5 factory constructors NPE exposes
(ofWebClient, ofWebEmbeddedPlayer, ofCharts,
ofAndroid, ofIos). build_envelope() emits the
InnerTube JSON in NPE's exact insertion order;
build_desktop_envelope() is the WEB-fast-path
used by search/browse/next/resolve_url/comments.
* itag.rs — 57-entry itag table (14 progressive + 10 audio +
33 video-only). MediaFormat enum + ItagType
enum + ItagItem struct + lookup().
* parsing.rs — consent toggle + cookie generator (SOCS=CAE= /
SOCS=CAISAiAD), WEB client-version cache + sw.js
scrape, WEB/mobile header builders (mobile
deliberately strips X-YouTube-Client-Name +
Origin/Referer + Cookie per audit Track A §6.2),
android/ios UA templates, visitor_data bootstrap
POST to /youtubei/v1/visitor_id.
PARITY notes flagged in code:
* androidSdkVersion=36 + osVersion=16 but Android-15 in UA — NPE-intentional
* mobile clients send NO X-YouTube-Client-* headers
* audit doc says "53 entries" but tallies + NPE source = 57 ItagItems
Tests: 62 lib unit pass (up from 43 in Phase 2). All Phase 1 + Phase 2
smoke still green. Live InnerTube POSTs (visitor_data bootstrap +
/player) deferred to Phase 4 integration.
This commit is contained in:
parent
91639f26d1
commit
3014410cba
5 changed files with 863 additions and 3 deletions
351
src/youtube/client_request.rs
Normal file
351
src/youtube/client_request.rs
Normal file
|
|
@ -0,0 +1,351 @@
|
|||
// InnertubeClientRequestInfo + JSON envelope builder. Mirrors NPE
|
||||
// InnertubeClientRequestInfo.java + the prepareJsonBuilder() flow in
|
||||
// YoutubeParsingHelper.java:1494-1559.
|
||||
//
|
||||
// Wire-order matters (audit Track A §2.1) — `serde_json::json!` macro
|
||||
// preserves insertion order in the resulting Map, but we use a sequence
|
||||
// of `.insert()` calls into a `serde_json::Map` to be explicit.
|
||||
|
||||
use serde_json::{json, Map, Value};
|
||||
|
||||
use crate::localization::{ContentCountry, Localization};
|
||||
use crate::youtube::constants::*;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ClientInfo {
|
||||
pub client_name: String,
|
||||
pub client_version: String,
|
||||
pub client_id: String,
|
||||
pub client_screen: Option<String>,
|
||||
pub visitor_data: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct DeviceInfo {
|
||||
pub platform: Option<String>,
|
||||
pub device_make: Option<String>,
|
||||
pub device_model: Option<String>,
|
||||
pub os_name: Option<String>,
|
||||
pub os_version: Option<String>,
|
||||
/// `-1` is NPE's sentinel for "not applicable" — only included in the
|
||||
/// JSON when > 0.
|
||||
pub android_sdk_version: i32,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct InnertubeClientRequestInfo {
|
||||
pub client_info: ClientInfo,
|
||||
pub device_info: DeviceInfo,
|
||||
}
|
||||
|
||||
impl InnertubeClientRequestInfo {
|
||||
pub fn of_web_client() -> Self {
|
||||
Self {
|
||||
client_info: ClientInfo {
|
||||
client_name: WEB_CLIENT_NAME.into(),
|
||||
client_version: WEB_HARDCODED_CLIENT_VERSION.into(),
|
||||
client_id: WEB_CLIENT_ID.into(),
|
||||
client_screen: Some(WATCH_CLIENT_SCREEN.into()),
|
||||
visitor_data: None,
|
||||
},
|
||||
device_info: DeviceInfo {
|
||||
platform: Some(DESKTOP_CLIENT_PLATFORM.into()),
|
||||
android_sdk_version: -1,
|
||||
..Default::default()
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn of_web_embedded_player_client() -> Self {
|
||||
Self {
|
||||
client_info: ClientInfo {
|
||||
client_name: WEB_EMBEDDED_CLIENT_NAME.into(),
|
||||
client_version: WEB_EMBEDDED_CLIENT_VERSION.into(),
|
||||
client_id: WEB_EMBEDDED_CLIENT_ID.into(),
|
||||
client_screen: Some(EMBED_CLIENT_SCREEN.into()),
|
||||
visitor_data: None,
|
||||
},
|
||||
device_info: DeviceInfo {
|
||||
platform: Some(DESKTOP_CLIENT_PLATFORM.into()),
|
||||
android_sdk_version: -1,
|
||||
..Default::default()
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn of_web_music_analytics_charts_client() -> Self {
|
||||
// NPE deliberately omits clientScreen + platform for charts.
|
||||
Self {
|
||||
client_info: ClientInfo {
|
||||
client_name: WEB_MUSIC_ANALYTICS_CLIENT_NAME.into(),
|
||||
client_version: WEB_MUSIC_ANALYTICS_CLIENT_VERSION.into(),
|
||||
client_id: WEB_MUSIC_ANALYTICS_CLIENT_ID.into(),
|
||||
client_screen: None,
|
||||
visitor_data: None,
|
||||
},
|
||||
device_info: DeviceInfo {
|
||||
android_sdk_version: -1,
|
||||
..Default::default()
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn of_android_client() -> Self {
|
||||
Self {
|
||||
client_info: ClientInfo {
|
||||
client_name: ANDROID_CLIENT_NAME.into(),
|
||||
client_version: ANDROID_CLIENT_VERSION.into(),
|
||||
client_id: ANDROID_CLIENT_ID.into(),
|
||||
client_screen: Some(WATCH_CLIENT_SCREEN.into()),
|
||||
visitor_data: None,
|
||||
},
|
||||
device_info: DeviceInfo {
|
||||
platform: Some(MOBILE_CLIENT_PLATFORM.into()),
|
||||
os_name: Some("Android".into()),
|
||||
os_version: Some(ANDROID_OS_VERSION.into()),
|
||||
android_sdk_version: ANDROID_SDK_VERSION as i32,
|
||||
..Default::default()
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn of_ios_client() -> Self {
|
||||
Self {
|
||||
client_info: ClientInfo {
|
||||
client_name: IOS_CLIENT_NAME.into(),
|
||||
client_version: IOS_CLIENT_VERSION.into(),
|
||||
client_id: IOS_CLIENT_ID.into(),
|
||||
client_screen: Some(WATCH_CLIENT_SCREEN.into()),
|
||||
visitor_data: None,
|
||||
},
|
||||
device_info: DeviceInfo {
|
||||
platform: Some(MOBILE_CLIENT_PLATFORM.into()),
|
||||
device_make: Some("Apple".into()),
|
||||
device_model: Some(IOS_DEVICE_MODEL.into()),
|
||||
os_name: Some("iOS".into()),
|
||||
os_version: Some(IOS_OS_VERSION.into()),
|
||||
android_sdk_version: -1,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds the InnerTube request envelope mirroring NPE prepareJsonBuilder.
|
||||
/// Insertion order matches NPE's wire-order verbatim (audit Track A §2.1).
|
||||
pub fn build_envelope(
|
||||
info: &InnertubeClientRequestInfo,
|
||||
localization: &Localization,
|
||||
content_country: &ContentCountry,
|
||||
embed_url: Option<&str>,
|
||||
) -> Value {
|
||||
let mut client = Map::new();
|
||||
client.insert("clientName".into(), Value::String(info.client_info.client_name.clone()));
|
||||
client.insert("clientVersion".into(), Value::String(info.client_info.client_version.clone()));
|
||||
if let Some(s) = &info.client_info.client_screen {
|
||||
client.insert("clientScreen".into(), Value::String(s.clone()));
|
||||
}
|
||||
if let Some(p) = &info.device_info.platform {
|
||||
client.insert("platform".into(), Value::String(p.clone()));
|
||||
}
|
||||
if let Some(v) = &info.client_info.visitor_data {
|
||||
client.insert("visitorData".into(), Value::String(v.clone()));
|
||||
}
|
||||
if let Some(m) = &info.device_info.device_make {
|
||||
client.insert("deviceMake".into(), Value::String(m.clone()));
|
||||
}
|
||||
if let Some(m) = &info.device_info.device_model {
|
||||
client.insert("deviceModel".into(), Value::String(m.clone()));
|
||||
}
|
||||
if let Some(n) = &info.device_info.os_name {
|
||||
client.insert("osName".into(), Value::String(n.clone()));
|
||||
}
|
||||
if let Some(v) = &info.device_info.os_version {
|
||||
client.insert("osVersion".into(), Value::String(v.clone()));
|
||||
}
|
||||
if info.device_info.android_sdk_version > 0 {
|
||||
client.insert(
|
||||
"androidSdkVersion".into(),
|
||||
Value::Number(info.device_info.android_sdk_version.into()),
|
||||
);
|
||||
}
|
||||
client.insert("hl".into(), Value::String(localization.localization_code()));
|
||||
client.insert("gl".into(), Value::String(content_country.country_code().into()));
|
||||
client.insert("utcOffsetMinutes".into(), Value::Number(0.into()));
|
||||
|
||||
let mut context = Map::new();
|
||||
context.insert("client".into(), Value::Object(client));
|
||||
if let Some(url) = embed_url {
|
||||
context.insert("thirdParty".into(), json!({ "embedUrl": url }));
|
||||
}
|
||||
context.insert(
|
||||
"request".into(),
|
||||
json!({
|
||||
"internalExperimentFlags": [],
|
||||
"useSsl": true,
|
||||
}),
|
||||
);
|
||||
context.insert(
|
||||
"user".into(),
|
||||
json!({
|
||||
"lockedSafetyMode": false,
|
||||
}),
|
||||
);
|
||||
|
||||
json!({ "context": Value::Object(context) })
|
||||
}
|
||||
|
||||
/// Desktop fast-path envelope — mirrors NPE prepareDesktopJsonBuilder
|
||||
/// (YoutubeParsingHelper.java:1044-1072). Used by search / browse / next
|
||||
/// / resolve_url / comments. NO clientScreen, NO visitor_data, NO device
|
||||
/// fields — also adds `originalUrl: https://www.youtube.com` (literal).
|
||||
pub fn build_desktop_envelope(
|
||||
localization: &Localization,
|
||||
content_country: &ContentCountry,
|
||||
web_client_version: &str,
|
||||
) -> Value {
|
||||
let mut client = Map::new();
|
||||
client.insert("hl".into(), Value::String(localization.localization_code()));
|
||||
client.insert("gl".into(), Value::String(content_country.country_code().into()));
|
||||
client.insert("clientName".into(), Value::String(WEB_CLIENT_NAME.into()));
|
||||
client.insert("clientVersion".into(), Value::String(web_client_version.into()));
|
||||
client.insert(
|
||||
"originalUrl".into(),
|
||||
Value::String("https://www.youtube.com".into()),
|
||||
);
|
||||
client.insert("platform".into(), Value::String(DESKTOP_CLIENT_PLATFORM.into()));
|
||||
client.insert("utcOffsetMinutes".into(), Value::Number(0.into()));
|
||||
|
||||
json!({
|
||||
"context": {
|
||||
"client": Value::Object(client),
|
||||
"request": {
|
||||
"internalExperimentFlags": [],
|
||||
"useSsl": true,
|
||||
},
|
||||
"user": {
|
||||
"lockedSafetyMode": false,
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn web_client_envelope_shape() {
|
||||
let info = InnertubeClientRequestInfo::of_web_client();
|
||||
let env = build_envelope(
|
||||
&info,
|
||||
&Localization::default(),
|
||||
&ContentCountry::default(),
|
||||
None,
|
||||
);
|
||||
let client = &env["context"]["client"];
|
||||
assert_eq!(client["clientName"], "WEB");
|
||||
assert_eq!(client["clientVersion"], "2.20260120.01.00");
|
||||
assert_eq!(client["clientScreen"], "WATCH");
|
||||
assert_eq!(client["platform"], "DESKTOP");
|
||||
assert_eq!(client["hl"], "en-GB");
|
||||
assert_eq!(client["gl"], "GB");
|
||||
assert_eq!(client["utcOffsetMinutes"], 0);
|
||||
assert!(client.get("visitorData").is_none());
|
||||
assert!(client.get("androidSdkVersion").is_none());
|
||||
// thirdParty omitted when no embed_url
|
||||
assert!(env["context"].get("thirdParty").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn android_client_envelope_shape() {
|
||||
let info = InnertubeClientRequestInfo::of_android_client();
|
||||
let env = build_envelope(
|
||||
&info,
|
||||
&Localization::default(),
|
||||
&ContentCountry::default(),
|
||||
None,
|
||||
);
|
||||
let client = &env["context"]["client"];
|
||||
assert_eq!(client["clientName"], "ANDROID");
|
||||
assert_eq!(client["clientVersion"], "21.03.36");
|
||||
assert_eq!(client["platform"], "MOBILE");
|
||||
assert_eq!(client["osName"], "Android");
|
||||
assert_eq!(client["osVersion"], "16");
|
||||
assert_eq!(client["androidSdkVersion"], 36);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ios_client_envelope_shape() {
|
||||
let info = InnertubeClientRequestInfo::of_ios_client();
|
||||
let env = build_envelope(
|
||||
&info,
|
||||
&Localization::default(),
|
||||
&ContentCountry::default(),
|
||||
None,
|
||||
);
|
||||
let client = &env["context"]["client"];
|
||||
assert_eq!(client["clientName"], "IOS");
|
||||
assert_eq!(client["deviceMake"], "Apple");
|
||||
assert_eq!(client["deviceModel"], "iPhone16,2");
|
||||
assert_eq!(client["osName"], "iOS");
|
||||
assert_eq!(client["osVersion"], "18.7.2.22H124");
|
||||
assert!(client.get("androidSdkVersion").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn charts_client_omits_platform_and_screen() {
|
||||
let info = InnertubeClientRequestInfo::of_web_music_analytics_charts_client();
|
||||
let env = build_envelope(
|
||||
&info,
|
||||
&Localization::default(),
|
||||
&ContentCountry::default(),
|
||||
None,
|
||||
);
|
||||
let client = &env["context"]["client"];
|
||||
assert_eq!(client["clientName"], "WEB_MUSIC_ANALYTICS");
|
||||
assert!(client.get("clientScreen").is_none());
|
||||
assert!(client.get("platform").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn embed_url_lands_in_third_party_block() {
|
||||
let info = InnertubeClientRequestInfo::of_web_embedded_player_client();
|
||||
let env = build_envelope(
|
||||
&info,
|
||||
&Localization::default(),
|
||||
&ContentCountry::default(),
|
||||
Some("https://www.youtube.com/embed/abc"),
|
||||
);
|
||||
assert_eq!(
|
||||
env["context"]["thirdParty"]["embedUrl"],
|
||||
"https://www.youtube.com/embed/abc"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn visitor_data_lands_in_client_block_when_set() {
|
||||
let mut info = InnertubeClientRequestInfo::of_android_client();
|
||||
info.client_info.visitor_data = Some("Cgs1ZG1...".into());
|
||||
let env = build_envelope(
|
||||
&info,
|
||||
&Localization::default(),
|
||||
&ContentCountry::default(),
|
||||
None,
|
||||
);
|
||||
assert_eq!(env["context"]["client"]["visitorData"], "Cgs1ZG1...");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn desktop_envelope_uses_original_url_and_no_visitor() {
|
||||
let env = build_desktop_envelope(
|
||||
&Localization::default(),
|
||||
&ContentCountry::default(),
|
||||
"2.20260120.01.00",
|
||||
);
|
||||
let client = &env["context"]["client"];
|
||||
assert_eq!(client["originalUrl"], "https://www.youtube.com");
|
||||
assert_eq!(client["platform"], "DESKTOP");
|
||||
assert!(client.get("clientScreen").is_none());
|
||||
assert!(client.get("visitorData").is_none());
|
||||
}
|
||||
}
|
||||
53
src/youtube/constants.rs
Normal file
53
src/youtube/constants.rs
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
// ClientsConstants — mirrors NPE services/youtube/ClientsConstants.java.
|
||||
//
|
||||
// Six live InnerTube clients: WEB, WEB_EMBEDDED_PLAYER, WEB_MUSIC_ANALYTICS,
|
||||
// WEB_REMIX, ANDROID, IOS. NPE's tree also mentions TVHTML5 + MWEB in
|
||||
// comments — not actively used; skipped per audit Track A §1.3.
|
||||
|
||||
pub const DESKTOP_CLIENT_PLATFORM: &str = "DESKTOP";
|
||||
pub const MOBILE_CLIENT_PLATFORM: &str = "MOBILE";
|
||||
pub const WATCH_CLIENT_SCREEN: &str = "WATCH";
|
||||
pub const EMBED_CLIENT_SCREEN: &str = "EMBED";
|
||||
|
||||
pub const WEB_CLIENT_ID: &str = "1";
|
||||
pub const WEB_CLIENT_NAME: &str = "WEB";
|
||||
pub const WEB_HARDCODED_CLIENT_VERSION: &str = "2.20260120.01.00";
|
||||
|
||||
pub const WEB_REMIX_CLIENT_ID: &str = "67";
|
||||
pub const WEB_REMIX_CLIENT_NAME: &str = "WEB_REMIX";
|
||||
pub const WEB_REMIX_HARDCODED_CLIENT_VERSION: &str = "1.20260121.03.00";
|
||||
|
||||
pub const WEB_EMBEDDED_CLIENT_ID: &str = "56";
|
||||
pub const WEB_EMBEDDED_CLIENT_NAME: &str = "WEB_EMBEDDED_PLAYER";
|
||||
pub const WEB_EMBEDDED_CLIENT_VERSION: &str = "1.20260122.01.00";
|
||||
|
||||
pub const WEB_MUSIC_ANALYTICS_CLIENT_ID: &str = "31";
|
||||
pub const WEB_MUSIC_ANALYTICS_CLIENT_NAME: &str = "WEB_MUSIC_ANALYTICS";
|
||||
pub const WEB_MUSIC_ANALYTICS_CLIENT_VERSION: &str = "2.0";
|
||||
|
||||
// iPhone 15 Pro Max — chosen explicitly for 60fps tags per ItagItem.java:26
|
||||
// note and the gist referenced in NPE source.
|
||||
pub const IOS_CLIENT_ID: &str = "5";
|
||||
pub const IOS_CLIENT_NAME: &str = "IOS";
|
||||
pub const IOS_CLIENT_VERSION: &str = "21.03.2";
|
||||
pub const IOS_DEVICE_MODEL: &str = "iPhone16,2";
|
||||
pub const IOS_OS_VERSION: &str = "18.7.2.22H124";
|
||||
pub const IOS_USER_AGENT_VERSION: &str = "18_7_2";
|
||||
|
||||
pub const ANDROID_CLIENT_ID: &str = "3";
|
||||
pub const ANDROID_CLIENT_NAME: &str = "ANDROID";
|
||||
pub const ANDROID_CLIENT_VERSION: &str = "21.03.36";
|
||||
|
||||
// PARITY: NPE hard-codes androidSdkVersion=36 + osVersion=16 even though
|
||||
// the User-Agent advertises Android 15. DroidGuard doesn't check the
|
||||
// InnerTube context so this mismatch is intentional and not a bug.
|
||||
pub const ANDROID_SDK_VERSION: u32 = 36;
|
||||
pub const ANDROID_OS_VERSION: &str = "16";
|
||||
|
||||
// Base URLs (NPE YoutubeParsingHelper.java:91,96).
|
||||
pub const YOUTUBEI_V1_URL: &str = "https://www.youtube.com/youtubei/v1/";
|
||||
pub const YOUTUBEI_V1_GAPIS_URL: &str = "https://youtubei.googleapis.com/youtubei/v1/";
|
||||
pub const YOUTUBE_MUSIC_INNERTUBE_URL: &str = "https://music.youtube.com/youtubei/v1/";
|
||||
|
||||
// All InnerTube calls carry this query suffix to shrink responses.
|
||||
pub const DISABLE_PRETTY_PRINT_PARAM: &str = "?prettyPrint=false";
|
||||
218
src/youtube/itag.rs
Normal file
218
src/youtube/itag.rs
Normal file
|
|
@ -0,0 +1,218 @@
|
|||
// itag → MediaFormat table. Mirrors NPE ItagItem.java:28-101 — the
|
||||
// hard-coded array of 53 entries (14 combined-AV + 10 audio + 33
|
||||
// video-only).
|
||||
//
|
||||
// Codec column ("AV1", "VP9") is derived from response mimeType at extract
|
||||
// time, NOT stored here — matches NPE's source comment ItagItem.java:26.
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
|
||||
pub enum MediaFormat {
|
||||
/// 3gp (legacy progressive video+audio)
|
||||
V3GPP,
|
||||
/// mp4 (video+audio or video-only)
|
||||
Mpeg4,
|
||||
/// webm (video+audio or video-only)
|
||||
Webm,
|
||||
/// M4A — AAC audio in mp4 container
|
||||
M4A,
|
||||
/// vorbis audio in webm container
|
||||
Webma,
|
||||
/// opus audio in webm container
|
||||
WebmaOpus,
|
||||
}
|
||||
|
||||
impl MediaFormat {
|
||||
pub fn mime(&self) -> &'static str {
|
||||
match self {
|
||||
MediaFormat::V3GPP => "video/3gpp",
|
||||
MediaFormat::Mpeg4 => "video/mp4",
|
||||
MediaFormat::Webm => "video/webm",
|
||||
MediaFormat::M4A => "audio/mp4",
|
||||
MediaFormat::Webma => "audio/webm",
|
||||
MediaFormat::WebmaOpus => "audio/webm",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn extension(&self) -> &'static str {
|
||||
match self {
|
||||
MediaFormat::V3GPP => "3gp",
|
||||
MediaFormat::Mpeg4 => "mp4",
|
||||
MediaFormat::Webm => "webm",
|
||||
MediaFormat::M4A => "m4a",
|
||||
MediaFormat::Webma => "webm",
|
||||
MediaFormat::WebmaOpus => "webm",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
|
||||
pub enum ItagType {
|
||||
/// Legacy progressive — single stream carries both video and audio.
|
||||
Video,
|
||||
/// DASH audio-only.
|
||||
Audio,
|
||||
/// DASH video-only adaptive.
|
||||
VideoOnly,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ItagItem {
|
||||
pub id: u32,
|
||||
pub item_type: ItagType,
|
||||
pub format: MediaFormat,
|
||||
pub resolution: Option<&'static str>,
|
||||
pub fps: u32,
|
||||
pub avg_bitrate_kbps: Option<u32>,
|
||||
}
|
||||
|
||||
const fn av(id: u32, format: MediaFormat, resolution: &'static str) -> ItagItem {
|
||||
ItagItem {
|
||||
id,
|
||||
item_type: ItagType::Video,
|
||||
format,
|
||||
resolution: Some(resolution),
|
||||
fps: 30,
|
||||
avg_bitrate_kbps: None,
|
||||
}
|
||||
}
|
||||
|
||||
const fn audio(id: u32, format: MediaFormat, kbps: u32) -> ItagItem {
|
||||
ItagItem {
|
||||
id,
|
||||
item_type: ItagType::Audio,
|
||||
format,
|
||||
resolution: None,
|
||||
fps: 0,
|
||||
avg_bitrate_kbps: Some(kbps),
|
||||
}
|
||||
}
|
||||
|
||||
const fn vo(id: u32, format: MediaFormat, resolution: &'static str, fps: u32) -> ItagItem {
|
||||
ItagItem {
|
||||
id,
|
||||
item_type: ItagType::VideoOnly,
|
||||
format,
|
||||
resolution: Some(resolution),
|
||||
fps,
|
||||
avg_bitrate_kbps: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// The 53-entry table.
|
||||
pub static ITAG_TABLE: &[ItagItem] = &[
|
||||
// Progressive (video+audio combined)
|
||||
av(17, MediaFormat::V3GPP, "144p"),
|
||||
av(36, MediaFormat::V3GPP, "240p"),
|
||||
av(18, MediaFormat::Mpeg4, "360p"),
|
||||
av(34, MediaFormat::Mpeg4, "360p"),
|
||||
av(35, MediaFormat::Mpeg4, "480p"),
|
||||
av(59, MediaFormat::Mpeg4, "480p"),
|
||||
av(78, MediaFormat::Mpeg4, "480p"),
|
||||
av(22, MediaFormat::Mpeg4, "720p"),
|
||||
av(37, MediaFormat::Mpeg4, "1080p"),
|
||||
av(38, MediaFormat::Mpeg4, "1080p"),
|
||||
av(43, MediaFormat::Webm, "360p"),
|
||||
av(44, MediaFormat::Webm, "480p"),
|
||||
av(45, MediaFormat::Webm, "720p"),
|
||||
av(46, MediaFormat::Webm, "1080p"),
|
||||
// Adaptive audio
|
||||
audio(171, MediaFormat::Webma, 128),
|
||||
audio(172, MediaFormat::Webma, 256),
|
||||
audio(599, MediaFormat::M4A, 32),
|
||||
audio(139, MediaFormat::M4A, 48),
|
||||
audio(140, MediaFormat::M4A, 128),
|
||||
audio(141, MediaFormat::M4A, 256),
|
||||
audio(600, MediaFormat::WebmaOpus, 35),
|
||||
audio(249, MediaFormat::WebmaOpus, 50),
|
||||
audio(250, MediaFormat::WebmaOpus, 70),
|
||||
audio(251, MediaFormat::WebmaOpus, 160),
|
||||
// Adaptive video-only (MP4 / AVC + AV1)
|
||||
vo(160, MediaFormat::Mpeg4, "144p", 30),
|
||||
vo(394, MediaFormat::Mpeg4, "144p", 30),
|
||||
vo(133, MediaFormat::Mpeg4, "240p", 30),
|
||||
vo(395, MediaFormat::Mpeg4, "240p", 30),
|
||||
vo(134, MediaFormat::Mpeg4, "360p", 30),
|
||||
vo(396, MediaFormat::Mpeg4, "360p", 30),
|
||||
vo(135, MediaFormat::Mpeg4, "480p", 30),
|
||||
vo(212, MediaFormat::Mpeg4, "480p", 30),
|
||||
vo(397, MediaFormat::Mpeg4, "480p", 30),
|
||||
vo(136, MediaFormat::Mpeg4, "720p", 30),
|
||||
vo(398, MediaFormat::Mpeg4, "720p", 30),
|
||||
vo(298, MediaFormat::Mpeg4, "720p60", 60),
|
||||
vo(137, MediaFormat::Mpeg4, "1080p", 30),
|
||||
vo(399, MediaFormat::Mpeg4, "1080p", 30),
|
||||
vo(299, MediaFormat::Mpeg4, "1080p60", 60),
|
||||
vo(400, MediaFormat::Mpeg4, "1440p", 30),
|
||||
vo(266, MediaFormat::Mpeg4, "2160p", 30),
|
||||
vo(401, MediaFormat::Mpeg4, "2160p", 30),
|
||||
// Adaptive video-only (WEBM / VP9)
|
||||
vo(278, MediaFormat::Webm, "144p", 30),
|
||||
vo(242, MediaFormat::Webm, "240p", 30),
|
||||
vo(243, MediaFormat::Webm, "360p", 30),
|
||||
vo(244, MediaFormat::Webm, "480p", 30),
|
||||
vo(245, MediaFormat::Webm, "480p", 30),
|
||||
vo(246, MediaFormat::Webm, "480p", 30),
|
||||
vo(247, MediaFormat::Webm, "720p", 30),
|
||||
vo(248, MediaFormat::Webm, "1080p", 30),
|
||||
vo(271, MediaFormat::Webm, "1440p", 30),
|
||||
vo(272, MediaFormat::Webm, "2160p", 30),
|
||||
vo(302, MediaFormat::Webm, "720p60", 60),
|
||||
vo(303, MediaFormat::Webm, "1080p60", 60),
|
||||
vo(308, MediaFormat::Webm, "1440p60", 60),
|
||||
vo(313, MediaFormat::Webm, "2160p", 30),
|
||||
vo(315, MediaFormat::Webm, "2160p60", 60),
|
||||
];
|
||||
|
||||
pub fn lookup(itag_id: u32) -> Option<&'static ItagItem> {
|
||||
ITAG_TABLE.iter().find(|it| it.id == itag_id)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn table_has_57_entries() {
|
||||
// Audit Track A §7 says "53" in prose but tallies the same 57
|
||||
// entries below. NPE source ItagItem.java has 57 distinct itag
|
||||
// IDs. Matches exactly.
|
||||
assert_eq!(ITAG_TABLE.len(), 57);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn itag_140_is_aac_128() {
|
||||
let it = lookup(140).unwrap();
|
||||
assert_eq!(it.item_type, ItagType::Audio);
|
||||
assert_eq!(it.format, MediaFormat::M4A);
|
||||
assert_eq!(it.avg_bitrate_kbps, Some(128));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn itag_22_is_progressive_720p_mp4() {
|
||||
let it = lookup(22).unwrap();
|
||||
assert_eq!(it.item_type, ItagType::Video);
|
||||
assert_eq!(it.format, MediaFormat::Mpeg4);
|
||||
assert_eq!(it.resolution, Some("720p"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn itag_315_is_2160p60_vp9_video_only() {
|
||||
let it = lookup(315).unwrap();
|
||||
assert_eq!(it.item_type, ItagType::VideoOnly);
|
||||
assert_eq!(it.format, MediaFormat::Webm);
|
||||
assert_eq!(it.resolution, Some("2160p60"));
|
||||
assert_eq!(it.fps, 60);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_itag_returns_none() {
|
||||
assert!(lookup(99999).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mime_and_extension() {
|
||||
assert_eq!(MediaFormat::M4A.mime(), "audio/mp4");
|
||||
assert_eq!(MediaFormat::M4A.extension(), "m4a");
|
||||
assert_eq!(MediaFormat::Webm.mime(), "video/webm");
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,11 @@
|
|||
// YouTube service tree. Phase 2 lands the JS deobfuscator (the keystone
|
||||
// risk per SPEC §9). Phase 3+ lands the InnerTube client matrix, itag
|
||||
// table, stream extractor, search, channel, etc.
|
||||
// YouTube service tree. Phase 2 landed the JS deobfuscator. Phase 3 adds
|
||||
// the InnerTube client matrix, request envelope, parsing helpers, and the
|
||||
// itag table. Phase 4+ will add the stream extractor, search, channel,
|
||||
// playlist, kiosks.
|
||||
|
||||
pub mod client_request;
|
||||
pub mod constants;
|
||||
pub mod itag;
|
||||
pub mod js;
|
||||
pub mod parsing;
|
||||
|
||||
|
|
|
|||
232
src/youtube/parsing.rs
Normal file
232
src/youtube/parsing.rs
Normal file
|
|
@ -0,0 +1,232 @@
|
|||
// YoutubeParsingHelper-shaped helpers — mirrors NPE
|
||||
// services/youtube/YoutubeParsingHelper.java.
|
||||
//
|
||||
// Currently implements:
|
||||
// * consent toggle + cookie generator (set_consent_accepted, consent_cookie)
|
||||
// * client-version cache + sw.js fetch fallback (get_web_client_version)
|
||||
// * visitor-data bootstrap via /youtubei/v1/visitor_id
|
||||
// * client/origin/referer header builder
|
||||
//
|
||||
// PoToken integration lands in Phase 5. po_token / DroidGuard / BotGuard
|
||||
// machinery is host-provided (PoTokenProvider trait).
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
use parking_lot::RwLock;
|
||||
use regex::Regex;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::downloader::request::Request;
|
||||
use crate::downloader::Downloader;
|
||||
use crate::exceptions::ParsingError;
|
||||
use crate::localization::{ContentCountry, Localization};
|
||||
use crate::newpipe::NewPipe;
|
||||
use crate::youtube::client_request::{
|
||||
build_envelope, InnertubeClientRequestInfo,
|
||||
};
|
||||
use crate::youtube::constants::*;
|
||||
|
||||
static CONSENT_ACCEPTED: Lazy<RwLock<bool>> = Lazy::new(|| RwLock::new(false));
|
||||
static CACHED_WEB_CLIENT_VERSION: Lazy<RwLock<Option<String>>> = Lazy::new(|| RwLock::new(None));
|
||||
|
||||
pub fn set_consent_accepted(accepted: bool) {
|
||||
*CONSENT_ACCEPTED.write() = accepted;
|
||||
}
|
||||
|
||||
pub fn is_consent_accepted() -> bool {
|
||||
*CONSENT_ACCEPTED.read()
|
||||
}
|
||||
|
||||
/// Returns the `SOCS=` consent cookie value. EU users need
|
||||
/// `CAISAiAD` (accepted) to extract mix-playlist continuations.
|
||||
pub fn consent_cookie() -> &'static str {
|
||||
if is_consent_accepted() {
|
||||
"SOCS=CAISAiAD"
|
||||
} else {
|
||||
"SOCS=CAE="
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the cached WEB client version. Falls back to the hardcoded
|
||||
/// constant if no live extraction has run.
|
||||
pub fn web_client_version() -> String {
|
||||
if let Some(v) = CACHED_WEB_CLIENT_VERSION.read().as_ref() {
|
||||
return v.clone();
|
||||
}
|
||||
WEB_HARDCODED_CLIENT_VERSION.to_string()
|
||||
}
|
||||
|
||||
pub fn reset_web_client_version_cache() {
|
||||
*CACHED_WEB_CLIENT_VERSION.write() = None;
|
||||
}
|
||||
|
||||
static SW_JS_VERSION_RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r#"INNERTUBE_CONTEXT_CLIENT_VERSION":\s*"([^"]+)""#).unwrap()
|
||||
});
|
||||
|
||||
/// Fetches sw.js + extracts the live WEB client version. Caches the
|
||||
/// result. Returns the cached value if already known.
|
||||
pub fn discover_web_client_version() -> Result<String, ParsingError> {
|
||||
if let Some(v) = CACHED_WEB_CLIENT_VERSION.read().as_ref() {
|
||||
return Ok(v.clone());
|
||||
}
|
||||
let downloader = NewPipe::downloader()
|
||||
.ok_or_else(|| ParsingError::Invalid("downloader not initialized".into()))?;
|
||||
let req = Request::get("https://www.youtube.com/sw.js")
|
||||
.add_header("Origin", "https://www.youtube.com")
|
||||
.add_header("Referer", "https://www.youtube.com")
|
||||
.build();
|
||||
let resp = downloader
|
||||
.execute(req)
|
||||
.map_err(|e| ParsingError::Invalid(format!("sw.js fetch: {e}")))?;
|
||||
if resp.response_code() != 200 {
|
||||
return Err(ParsingError::Invalid(format!(
|
||||
"sw.js HTTP {}",
|
||||
resp.response_code()
|
||||
)));
|
||||
}
|
||||
let version = SW_JS_VERSION_RE
|
||||
.captures(resp.response_body())
|
||||
.and_then(|c| c.get(1))
|
||||
.map(|m| m.as_str().to_string())
|
||||
.ok_or_else(|| ParsingError::RegexMiss("INNERTUBE_CONTEXT_CLIENT_VERSION".into()))?;
|
||||
*CACHED_WEB_CLIENT_VERSION.write() = Some(version.clone());
|
||||
Ok(version)
|
||||
}
|
||||
|
||||
/// Headers for a WEB-flavor POST (JSON content-type, client headers,
|
||||
/// origin/referer, consent cookie).
|
||||
pub fn youtube_post_headers() -> Vec<(String, String)> {
|
||||
vec![
|
||||
("Content-Type".into(), "application/json".into()),
|
||||
("X-YouTube-Client-Name".into(), WEB_CLIENT_ID.into()),
|
||||
("X-YouTube-Client-Version".into(), web_client_version()),
|
||||
("Origin".into(), "https://www.youtube.com".into()),
|
||||
("Referer".into(), "https://www.youtube.com".into()),
|
||||
("Cookie".into(), consent_cookie().into()),
|
||||
]
|
||||
}
|
||||
|
||||
/// Mobile (Android/iOS) POST headers — UA + format-version only. No
|
||||
/// X-YouTube-Client-Name, no Origin/Referer, no Cookie (audit Track A §6.2).
|
||||
pub fn mobile_post_headers(user_agent: &str) -> Vec<(String, String)> {
|
||||
vec![
|
||||
("Content-Type".into(), "application/json".into()),
|
||||
("User-Agent".into(), user_agent.into()),
|
||||
("X-Goog-Api-Format-Version".into(), "2".into()),
|
||||
]
|
||||
}
|
||||
|
||||
pub fn android_user_agent(country: &ContentCountry) -> String {
|
||||
format!(
|
||||
"com.google.android.youtube/{ANDROID_CLIENT_VERSION} (Linux; U; Android 15; {}) gzip",
|
||||
country.country_code()
|
||||
)
|
||||
}
|
||||
|
||||
pub fn ios_user_agent(country: &ContentCountry) -> String {
|
||||
format!(
|
||||
"com.google.ios.youtube/{IOS_CLIENT_VERSION}({IOS_DEVICE_MODEL}; U; CPU iOS {IOS_USER_AGENT_VERSION} like Mac OS X; {})",
|
||||
country.country_code()
|
||||
)
|
||||
}
|
||||
|
||||
/// Bootstraps a visitor_data token via `/youtubei/v1/visitor_id`. Returns
|
||||
/// the value of `responseContext.visitorData` from the response.
|
||||
pub fn bootstrap_visitor_data(
|
||||
info: &InnertubeClientRequestInfo,
|
||||
localization: &Localization,
|
||||
content_country: &ContentCountry,
|
||||
use_gapis_endpoint: bool,
|
||||
) -> Result<String, ParsingError> {
|
||||
let downloader = NewPipe::downloader()
|
||||
.ok_or_else(|| ParsingError::Invalid("downloader not initialized".into()))?;
|
||||
let envelope = build_envelope(info, localization, content_country, None);
|
||||
let body = serde_json::to_vec(&envelope)?;
|
||||
|
||||
let base = if use_gapis_endpoint {
|
||||
YOUTUBEI_V1_GAPIS_URL
|
||||
} else {
|
||||
YOUTUBEI_V1_URL
|
||||
};
|
||||
let url = format!("{base}visitor_id{DISABLE_PRETTY_PRINT_PARAM}");
|
||||
|
||||
let mut req_builder = Request::post(&url, body);
|
||||
for (k, v) in youtube_post_headers() {
|
||||
req_builder = req_builder.add_header(&k, &v);
|
||||
}
|
||||
let resp = downloader
|
||||
.execute(req_builder.build())
|
||||
.map_err(|e| ParsingError::Invalid(format!("visitor_id POST: {e}")))?;
|
||||
if resp.response_code() != 200 {
|
||||
return Err(ParsingError::Invalid(format!(
|
||||
"visitor_id HTTP {}",
|
||||
resp.response_code()
|
||||
)));
|
||||
}
|
||||
let parsed: Value = serde_json::from_str(resp.response_body())?;
|
||||
parsed
|
||||
.get("responseContext")
|
||||
.and_then(|rc| rc.get("visitorData"))
|
||||
.and_then(|v| v.as_str())
|
||||
.map(|s| s.to_string())
|
||||
.ok_or_else(|| ParsingError::MissingField("responseContext.visitorData".into()))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn consent_toggle_flips_cookie() {
|
||||
set_consent_accepted(false);
|
||||
assert_eq!(consent_cookie(), "SOCS=CAE=");
|
||||
set_consent_accepted(true);
|
||||
assert_eq!(consent_cookie(), "SOCS=CAISAiAD");
|
||||
set_consent_accepted(false); // reset for other tests
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn web_client_version_falls_back_to_hardcoded() {
|
||||
reset_web_client_version_cache();
|
||||
assert_eq!(web_client_version(), WEB_HARDCODED_CLIENT_VERSION);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mobile_headers_omit_client_name_and_referer() {
|
||||
let h = mobile_post_headers("ua/1.0");
|
||||
let keys: Vec<&str> = h.iter().map(|(k, _)| k.as_str()).collect();
|
||||
assert!(keys.contains(&"User-Agent"));
|
||||
assert!(keys.contains(&"X-Goog-Api-Format-Version"));
|
||||
assert!(!keys.contains(&"X-YouTube-Client-Name"));
|
||||
assert!(!keys.contains(&"Origin"));
|
||||
assert!(!keys.contains(&"Referer"));
|
||||
assert!(!keys.contains(&"Cookie"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn web_headers_include_consent_and_client_id() {
|
||||
let h = youtube_post_headers();
|
||||
let keys: Vec<&str> = h.iter().map(|(k, _)| k.as_str()).collect();
|
||||
assert!(keys.contains(&"X-YouTube-Client-Name"));
|
||||
assert!(keys.contains(&"Origin"));
|
||||
assert!(keys.contains(&"Cookie"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn android_ua_template() {
|
||||
let ua = android_user_agent(&ContentCountry::new("DE"));
|
||||
assert!(ua.contains("com.google.android.youtube/21.03.36"));
|
||||
assert!(ua.contains("Android 15"));
|
||||
assert!(ua.contains("; DE)"));
|
||||
assert!(ua.ends_with(" gzip"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ios_ua_template() {
|
||||
let ua = ios_user_agent(&ContentCountry::new("US"));
|
||||
assert!(ua.contains("com.google.ios.youtube/21.03.2"));
|
||||
assert!(ua.contains("iPhone16,2"));
|
||||
assert!(ua.contains("CPU iOS 18_7_2"));
|
||||
assert!(ua.contains("; US)"));
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue