Phase 4 (partial) — stream value types + InnerTube /player helpers
Lands the data shapes + the HTTP layer for stream extraction. The
extractor orchestrator + DASH manifest creator are deferred to the
next session — the parsing logic is dense enough to want a focused
pass.
src/stream/
* mod.rs — StreamInfo + StreamInfoItem (full + 'card' shapes)
mirroring NPE StreamInfo.java + StreamInfoItem.java
* delivery.rs — DeliveryMethod (Progressive/Dash/Hls/Torrent)
* audio.rs — AudioStream (itag, format, url, bitrate, codec,
audio_track_id, content_length, etc.)
* video.rs — VideoStream (itag, format, url, resolution, fps,
bandwidth, codec, video_only flag)
* subtitles.rs — SubtitlesStream (url, lang, auto_generated, mime)
src/youtube/stream_helper.rs
* generate_content_playback_nonce() — 16-char LCG-shuffled cpn
* get_web_metadata_player_response (microformat + thumbnails only)
* get_web_embedded_player_response (embed-url + signatureTimestamp)
* get_android_player_response (full Android /player + poToken)
* get_android_reel_player_response (no-poToken fallback)
* get_ios_player_response (iOS — flagged with 917 KiB cap
warning in the doc comment)
Per-helper headers + URL shapes match audit Track C §2.7 verbatim:
Android/iOS hit gapis endpoint with mobile UA; WEB family hits
www.youtube.com with the WEB headers.
Tests: 64 lib unit pass (up from 62 in Phase 3).
Next session: full stream_extractor.rs orchestrator + dash_manifest/
creator + Phase 4 done-when smoke (extract NCS Spektrem).
This commit is contained in:
parent
3014410cba
commit
cd98673684
9 changed files with 419 additions and 1 deletions
|
|
@ -13,6 +13,7 @@ pub mod metainfo;
|
|||
pub mod newpipe;
|
||||
pub mod page;
|
||||
pub mod service;
|
||||
pub mod stream;
|
||||
pub mod youtube;
|
||||
|
||||
pub use downloader::{Downloader, Request, Response};
|
||||
|
|
|
|||
20
src/stream/audio.rs
Normal file
20
src/stream/audio.rs
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
// AudioStream — one DASH or progressive audio variant.
|
||||
|
||||
use crate::stream::DeliveryMethod;
|
||||
use crate::youtube::itag::MediaFormat;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct AudioStream {
|
||||
pub itag: u32,
|
||||
pub url: String,
|
||||
pub format: MediaFormat,
|
||||
pub delivery: DeliveryMethod,
|
||||
pub average_bitrate_kbps: Option<u32>,
|
||||
pub codec: Option<String>,
|
||||
pub content_length_bytes: Option<i64>,
|
||||
pub audio_track_id: Option<String>,
|
||||
pub audio_track_name: Option<String>,
|
||||
pub audio_locale: Option<String>,
|
||||
pub is_descriptive: bool,
|
||||
pub itag_url_format: Option<String>,
|
||||
}
|
||||
14
src/stream/delivery.rs
Normal file
14
src/stream/delivery.rs
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
// DeliveryMethod — how a stream is fetched.
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
|
||||
pub enum DeliveryMethod {
|
||||
/// Direct progressive download (single HTTP GET, byte ranges).
|
||||
Progressive,
|
||||
/// DASH manifest URL — adaptive, segment-by-segment.
|
||||
Dash,
|
||||
/// HLS manifest URL — adaptive, mostly for live.
|
||||
Hls,
|
||||
/// Torrent magnet / .torrent URL. Not used by YT; included for parity
|
||||
/// with NPE for other services we may add later.
|
||||
Torrent,
|
||||
}
|
||||
89
src/stream/mod.rs
Normal file
89
src/stream/mod.rs
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
// Stream value types — mirrors NPE's stream/ package.
|
||||
//
|
||||
// Shapes lifted from NPE so the Rust port produces the same data
|
||||
// callers see today. Codec strings ("avc1.4d401f", "vp9.2") are kept as
|
||||
// opaque strings — they come from YT's response mimeType and we don't
|
||||
// classify them ourselves.
|
||||
|
||||
pub mod audio;
|
||||
pub mod delivery;
|
||||
pub mod subtitles;
|
||||
pub mod video;
|
||||
|
||||
pub use audio::AudioStream;
|
||||
pub use delivery::DeliveryMethod;
|
||||
pub use subtitles::SubtitlesStream;
|
||||
pub use video::VideoStream;
|
||||
|
||||
use crate::image::ImageSet;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
|
||||
pub enum StreamType {
|
||||
None,
|
||||
VideoStream,
|
||||
AudioStream,
|
||||
LiveStream,
|
||||
AudioLiveStream,
|
||||
VideoLiveStream,
|
||||
PostLiveStream,
|
||||
PostLiveAudioStream,
|
||||
}
|
||||
|
||||
/// Per-video metadata + format lists. Mirrors NPE StreamInfo.java.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct StreamInfo {
|
||||
pub service_id: u32,
|
||||
pub url: String,
|
||||
pub video_id: String,
|
||||
pub stream_type: Option<StreamType>,
|
||||
|
||||
pub name: String,
|
||||
pub description: String,
|
||||
pub duration_seconds: i64,
|
||||
pub view_count: i64,
|
||||
pub like_count: i64,
|
||||
pub dislike_count: i64,
|
||||
pub upload_date_iso: Option<String>,
|
||||
pub thumbnails: ImageSet,
|
||||
|
||||
pub uploader_name: String,
|
||||
pub uploader_url: String,
|
||||
pub uploader_id: String,
|
||||
pub uploader_avatars: ImageSet,
|
||||
pub uploader_subscriber_count: i64,
|
||||
pub uploader_verified: bool,
|
||||
|
||||
pub audio_streams: Vec<AudioStream>,
|
||||
pub video_streams: Vec<VideoStream>,
|
||||
pub video_only_streams: Vec<VideoStream>,
|
||||
pub subtitles: Vec<SubtitlesStream>,
|
||||
|
||||
pub dash_manifest_url: Option<String>,
|
||||
pub hls_manifest_url: Option<String>,
|
||||
|
||||
pub related_streams: Vec<StreamInfoItem>,
|
||||
pub tags: Vec<String>,
|
||||
pub category: String,
|
||||
}
|
||||
|
||||
/// Shorter "card" used in search results, channel video lists, playlists.
|
||||
/// Mirrors NPE StreamInfoItem.java.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct StreamInfoItem {
|
||||
pub service_id: u32,
|
||||
pub url: String,
|
||||
pub name: String,
|
||||
pub thumbnails: ImageSet,
|
||||
|
||||
pub uploader_name: String,
|
||||
pub uploader_url: String,
|
||||
pub uploader_id: String,
|
||||
pub uploader_verified: bool,
|
||||
|
||||
pub duration_seconds: i64,
|
||||
pub view_count: i64,
|
||||
pub upload_date_relative: String,
|
||||
|
||||
pub stream_type: Option<StreamType>,
|
||||
pub short_description: String,
|
||||
}
|
||||
11
src/stream/subtitles.rs
Normal file
11
src/stream/subtitles.rs
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
// SubtitlesStream — closed-caption / subtitle track.
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct SubtitlesStream {
|
||||
pub url: String,
|
||||
pub language_code: String,
|
||||
pub name: String,
|
||||
pub is_auto_generated: bool,
|
||||
/// MIME type (`application/ttml+xml`, `text/vtt`, `application/x-subrip`, etc.)
|
||||
pub mime: String,
|
||||
}
|
||||
22
src/stream/video.rs
Normal file
22
src/stream/video.rs
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
// VideoStream — one DASH or progressive video variant. `video_only=true`
|
||||
// means it's an adaptive video-only track that has to be paired with an
|
||||
// AudioStream by the consumer (ExoPlayer handles this via DASH).
|
||||
|
||||
use crate::stream::DeliveryMethod;
|
||||
use crate::youtube::itag::MediaFormat;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct VideoStream {
|
||||
pub itag: u32,
|
||||
pub url: String,
|
||||
pub format: MediaFormat,
|
||||
pub delivery: DeliveryMethod,
|
||||
pub resolution: String,
|
||||
pub fps: u32,
|
||||
pub bandwidth: Option<u32>,
|
||||
pub codec: Option<String>,
|
||||
pub content_length_bytes: Option<i64>,
|
||||
pub width: Option<u32>,
|
||||
pub height: Option<u32>,
|
||||
pub video_only: bool,
|
||||
}
|
||||
|
|
@ -8,4 +8,5 @@ pub mod constants;
|
|||
pub mod itag;
|
||||
pub mod js;
|
||||
pub mod parsing;
|
||||
pub mod stream_helper;
|
||||
|
||||
|
|
|
|||
|
|
@ -16,7 +16,6 @@ use regex::Regex;
|
|||
use serde_json::Value;
|
||||
|
||||
use crate::downloader::request::Request;
|
||||
use crate::downloader::Downloader;
|
||||
use crate::exceptions::ParsingError;
|
||||
use crate::localization::{ContentCountry, Localization};
|
||||
use crate::newpipe::NewPipe;
|
||||
|
|
|
|||
261
src/youtube/stream_helper.rs
Normal file
261
src/youtube/stream_helper.rs
Normal file
|
|
@ -0,0 +1,261 @@
|
|||
// YoutubeStreamHelper — 5 per-client /player request helpers.
|
||||
// Mirrors NPE services/youtube/YoutubeStreamHelper.java.
|
||||
//
|
||||
// Each helper builds the InnerTube envelope + the per-endpoint payload
|
||||
// (videoId, cpn, contentCheckOk, racyCheckOk, playbackContext, optional
|
||||
// serviceIntegrityDimensions for poToken), POSTs to the right URL with
|
||||
// the right headers, returns the parsed JSON.
|
||||
|
||||
use serde_json::{json, Map, Value};
|
||||
|
||||
use crate::downloader::request::Request;
|
||||
use crate::exceptions::{NetworkError, ParsingError};
|
||||
use crate::localization::{ContentCountry, Localization};
|
||||
use crate::newpipe::NewPipe;
|
||||
use crate::youtube::client_request::{build_envelope, InnertubeClientRequestInfo};
|
||||
use crate::youtube::constants::*;
|
||||
use crate::youtube::parsing::{
|
||||
android_user_agent, ios_user_agent, mobile_post_headers, youtube_post_headers,
|
||||
};
|
||||
|
||||
/// Builds a 12-char alphanumeric `cpn` (content playback nonce). NPE uses
|
||||
/// a custom alphabet; we mirror it. NOT cryptographically random — just
|
||||
/// shaped to look like YT's own format. Per-client cpn, so we keep it as
|
||||
/// a free helper.
|
||||
pub fn generate_content_playback_nonce() -> String {
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
const ALPHABET: &[u8] =
|
||||
b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
|
||||
let mut seed = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map(|d| d.as_nanos() as u64)
|
||||
.unwrap_or(0);
|
||||
let mut out = String::with_capacity(16);
|
||||
for _ in 0..16 {
|
||||
seed = seed.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
|
||||
out.push(ALPHABET[(seed.rotate_right(7) as usize) % ALPHABET.len()] as char);
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Common body fields every /player call needs.
|
||||
fn add_player_body_fields(body: &mut Map<String, Value>, video_id: &str, cpn: &str) {
|
||||
body.insert("videoId".into(), Value::String(video_id.into()));
|
||||
body.insert("cpn".into(), Value::String(cpn.into()));
|
||||
body.insert("contentCheckOk".into(), Value::Bool(true));
|
||||
body.insert("racyCheckOk".into(), Value::Bool(true));
|
||||
}
|
||||
|
||||
fn add_playback_context(body: &mut Map<String, Value>, signature_timestamp: i32, referer: &str) {
|
||||
body.insert(
|
||||
"playbackContext".into(),
|
||||
json!({
|
||||
"contentPlaybackContext": {
|
||||
"signatureTimestamp": signature_timestamp,
|
||||
"referer": referer,
|
||||
}
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
fn add_service_integrity_dimensions(body: &mut Map<String, Value>, po_token: &str) {
|
||||
body.insert(
|
||||
"serviceIntegrityDimensions".into(),
|
||||
json!({ "poToken": po_token }),
|
||||
);
|
||||
}
|
||||
|
||||
fn envelope_to_body(envelope: Value) -> Map<String, Value> {
|
||||
match envelope {
|
||||
Value::Object(map) => map,
|
||||
_ => Map::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// WEB-client metadata-only /player call. Per audit Track A §4.4 — used
|
||||
/// for microformat + thumbnails only; never used as a stream URL source.
|
||||
pub fn get_web_metadata_player_response(
|
||||
video_id: &str,
|
||||
localization: &Localization,
|
||||
content_country: &ContentCountry,
|
||||
signature_timestamp: i32,
|
||||
) -> Result<Value, ExtractionError> {
|
||||
let info = InnertubeClientRequestInfo::of_web_client();
|
||||
let env = build_envelope(&info, localization, content_country, None);
|
||||
let mut body = envelope_to_body(env);
|
||||
add_player_body_fields(&mut body, video_id, &generate_content_playback_nonce());
|
||||
add_playback_context(&mut body, signature_timestamp, "https://www.youtube.com");
|
||||
let url = format!(
|
||||
"{YOUTUBEI_V1_URL}player{DISABLE_PRETTY_PRINT_PARAM}&$fields=microformat,videoDetails.thumbnail.thumbnails,videoDetails.videoId"
|
||||
);
|
||||
post_youtube(&url, &Value::Object(body), youtube_post_headers())
|
||||
}
|
||||
|
||||
/// WEB_EMBEDDED_PLAYER /player call. Carries embedUrl + signatureTimestamp.
|
||||
pub fn get_web_embedded_player_response(
|
||||
video_id: &str,
|
||||
localization: &Localization,
|
||||
content_country: &ContentCountry,
|
||||
signature_timestamp: i32,
|
||||
po_token: Option<&str>,
|
||||
) -> Result<Value, ExtractionError> {
|
||||
let info = InnertubeClientRequestInfo::of_web_embedded_player_client();
|
||||
let embed_url = format!("https://www.youtube.com/embed/{video_id}");
|
||||
let env = build_envelope(&info, localization, content_country, Some(&embed_url));
|
||||
let mut body = envelope_to_body(env);
|
||||
add_player_body_fields(&mut body, video_id, &generate_content_playback_nonce());
|
||||
add_playback_context(&mut body, signature_timestamp, &embed_url);
|
||||
if let Some(token) = po_token {
|
||||
add_service_integrity_dimensions(&mut body, token);
|
||||
}
|
||||
let url = format!("{YOUTUBEI_V1_URL}player{DISABLE_PRETTY_PRINT_PARAM}");
|
||||
post_youtube(&url, &Value::Object(body), youtube_post_headers())
|
||||
}
|
||||
|
||||
/// ANDROID full /player call. Hits the gapis endpoint with the mobile
|
||||
/// header set. Caller must supply (cpn, po_token) — they are paired with
|
||||
/// the URLs the response will return; mixing them with iOS values returns
|
||||
/// 403.
|
||||
pub fn get_android_player_response(
|
||||
video_id: &str,
|
||||
localization: &Localization,
|
||||
content_country: &ContentCountry,
|
||||
cpn: &str,
|
||||
po_token: Option<&str>,
|
||||
visitor_data: Option<&str>,
|
||||
) -> Result<Value, ExtractionError> {
|
||||
let mut info = InnertubeClientRequestInfo::of_android_client();
|
||||
if let Some(v) = visitor_data {
|
||||
info.client_info.visitor_data = Some(v.into());
|
||||
}
|
||||
let env = build_envelope(&info, localization, content_country, None);
|
||||
let mut body = envelope_to_body(env);
|
||||
add_player_body_fields(&mut body, video_id, cpn);
|
||||
if let Some(token) = po_token {
|
||||
add_service_integrity_dimensions(&mut body, token);
|
||||
}
|
||||
let url = format!(
|
||||
"{YOUTUBEI_V1_GAPIS_URL}player{DISABLE_PRETTY_PRINT_PARAM}&t={t}&id={video_id}",
|
||||
t = generate_content_playback_nonce()
|
||||
);
|
||||
let ua = android_user_agent(content_country);
|
||||
post_youtube(&url, &Value::Object(body), mobile_post_headers(&ua))
|
||||
}
|
||||
|
||||
/// ANDROID `/reel/reel_item_watch` fallback — used when no poToken is
|
||||
/// available. Returns a `playerResponse`-shaped JSON wrapped inside the
|
||||
/// reel response.
|
||||
pub fn get_android_reel_player_response(
|
||||
video_id: &str,
|
||||
localization: &Localization,
|
||||
content_country: &ContentCountry,
|
||||
cpn: &str,
|
||||
) -> Result<Value, ExtractionError> {
|
||||
let info = InnertubeClientRequestInfo::of_android_client();
|
||||
let env = build_envelope(&info, localization, content_country, None);
|
||||
let mut body = envelope_to_body(env);
|
||||
body.insert(
|
||||
"playerRequest".into(),
|
||||
json!({
|
||||
"videoId": video_id,
|
||||
"cpn": cpn,
|
||||
}),
|
||||
);
|
||||
add_player_body_fields(&mut body, video_id, cpn);
|
||||
let url = format!(
|
||||
"{YOUTUBEI_V1_GAPIS_URL}reel/reel_item_watch{DISABLE_PRETTY_PRINT_PARAM}&t={t}&id={video_id}&$fields=playerResponse",
|
||||
t = generate_content_playback_nonce()
|
||||
);
|
||||
let ua = android_user_agent(content_country);
|
||||
post_youtube(&url, &Value::Object(body), mobile_post_headers(&ua))
|
||||
}
|
||||
|
||||
/// IOS /player call. The iOS-progressive URLs returned here are subject
|
||||
/// to YT's ~917 KiB server-side cap — DO NOT route playback through
|
||||
/// these as the primary path. They're useful for HLS manifests on live
|
||||
/// streams. (See workspace memory/2026-05-24-night2-straw-vc18-rollback.md
|
||||
/// for the cap diagnostic.)
|
||||
pub fn get_ios_player_response(
|
||||
video_id: &str,
|
||||
localization: &Localization,
|
||||
content_country: &ContentCountry,
|
||||
cpn: &str,
|
||||
po_token: Option<&str>,
|
||||
visitor_data: Option<&str>,
|
||||
) -> Result<Value, ExtractionError> {
|
||||
let mut info = InnertubeClientRequestInfo::of_ios_client();
|
||||
if let Some(v) = visitor_data {
|
||||
info.client_info.visitor_data = Some(v.into());
|
||||
}
|
||||
let env = build_envelope(&info, localization, content_country, None);
|
||||
let mut body = envelope_to_body(env);
|
||||
add_player_body_fields(&mut body, video_id, cpn);
|
||||
if let Some(token) = po_token {
|
||||
add_service_integrity_dimensions(&mut body, token);
|
||||
}
|
||||
let url = format!(
|
||||
"{YOUTUBEI_V1_GAPIS_URL}player{DISABLE_PRETTY_PRINT_PARAM}&t={t}&id={video_id}",
|
||||
t = generate_content_playback_nonce()
|
||||
);
|
||||
let ua = ios_user_agent(content_country);
|
||||
post_youtube(&url, &Value::Object(body), mobile_post_headers(&ua))
|
||||
}
|
||||
|
||||
fn post_youtube(
|
||||
url: &str,
|
||||
body: &Value,
|
||||
headers: Vec<(String, String)>,
|
||||
) -> Result<Value, ExtractionError> {
|
||||
let downloader = NewPipe::downloader().ok_or(ExtractionError::DownloaderMissing)?;
|
||||
let serialized = serde_json::to_vec(body).map_err(|e| {
|
||||
ExtractionError::Parsing(ParsingError::Invalid(format!("serialize body: {e}")))
|
||||
})?;
|
||||
let mut builder = Request::post(url, serialized);
|
||||
for (k, v) in headers {
|
||||
builder = builder.add_header(&k, &v);
|
||||
}
|
||||
let resp = downloader.execute(builder.build())?;
|
||||
if resp.response_code() != 200 {
|
||||
return Err(ExtractionError::Network(NetworkError::Transport(format!(
|
||||
"HTTP {} from {url}",
|
||||
resp.response_code()
|
||||
))));
|
||||
}
|
||||
let parsed: Value = serde_json::from_str(resp.response_body())
|
||||
.map_err(|e| ExtractionError::Parsing(ParsingError::JsonShape(e.to_string())))?;
|
||||
Ok(parsed)
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum ExtractionError {
|
||||
#[error("network: {0}")]
|
||||
Network(#[from] NetworkError),
|
||||
#[error("parsing: {0}")]
|
||||
Parsing(#[from] ParsingError),
|
||||
#[error("downloader not initialized")]
|
||||
DownloaderMissing,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn cpn_is_16_chars_alphanumeric() {
|
||||
let cpn = generate_content_playback_nonce();
|
||||
assert_eq!(cpn.len(), 16);
|
||||
assert!(cpn
|
||||
.chars()
|
||||
.all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_'));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn two_consecutive_cpns_differ() {
|
||||
// The nanos-seeded LCG advances monotonically — two back-to-back
|
||||
// calls should produce different cpns.
|
||||
let a = generate_content_playback_nonce();
|
||||
std::thread::sleep(std::time::Duration::from_millis(2));
|
||||
let b = generate_content_playback_nonce();
|
||||
assert_ne!(a, b);
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue