Phase 4 (partial) — stream value types + InnerTube /player helpers

Lands the data shapes + the HTTP layer for stream extraction. The
extractor orchestrator + DASH manifest creator are deferred to the
next session — the parsing logic is dense enough to want a focused
pass.

src/stream/
  * mod.rs       — StreamInfo + StreamInfoItem (full + 'card' shapes)
                   mirroring NPE StreamInfo.java + StreamInfoItem.java
  * delivery.rs  — DeliveryMethod (Progressive/Dash/Hls/Torrent)
  * audio.rs     — AudioStream (itag, format, url, bitrate, codec,
                   audio_track_id, content_length, etc.)
  * video.rs     — VideoStream (itag, format, url, resolution, fps,
                   bandwidth, codec, video_only flag)
  * subtitles.rs — SubtitlesStream (url, lang, auto_generated, mime)

src/youtube/stream_helper.rs
  * generate_content_playback_nonce() — 16-char LCG-shuffled cpn
  * get_web_metadata_player_response       (microformat + thumbnails only)
  * get_web_embedded_player_response       (embed-url + signatureTimestamp)
  * get_android_player_response            (full Android /player + poToken)
  * get_android_reel_player_response       (no-poToken fallback)
  * get_ios_player_response                (iOS — flagged with 917 KiB cap
                                            warning in the doc comment)

Per-helper headers + URL shapes match audit Track C §2.7 verbatim:
Android/iOS hit gapis endpoint with mobile UA; WEB family hits
www.youtube.com with the WEB headers.

Tests: 64 lib unit pass (up from 62 in Phase 3).

Next session: full stream_extractor.rs orchestrator + dash_manifest/
creator + Phase 4 done-when smoke (extract NCS Spektrem).
This commit is contained in:
Kayos 2026-05-24 17:01:03 -07:00
parent 3014410cba
commit cd98673684
9 changed files with 419 additions and 1 deletions

View file

@ -13,6 +13,7 @@ pub mod metainfo;
pub mod newpipe;
pub mod page;
pub mod service;
pub mod stream;
pub mod youtube;
pub use downloader::{Downloader, Request, Response};

20
src/stream/audio.rs Normal file
View file

@ -0,0 +1,20 @@
// AudioStream — one DASH or progressive audio variant.
use crate::stream::DeliveryMethod;
use crate::youtube::itag::MediaFormat;
#[derive(Clone, Debug)]
pub struct AudioStream {
pub itag: u32,
pub url: String,
pub format: MediaFormat,
pub delivery: DeliveryMethod,
pub average_bitrate_kbps: Option<u32>,
pub codec: Option<String>,
pub content_length_bytes: Option<i64>,
pub audio_track_id: Option<String>,
pub audio_track_name: Option<String>,
pub audio_locale: Option<String>,
pub is_descriptive: bool,
pub itag_url_format: Option<String>,
}

14
src/stream/delivery.rs Normal file
View file

@ -0,0 +1,14 @@
// DeliveryMethod — how a stream is fetched.
#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
pub enum DeliveryMethod {
/// Direct progressive download (single HTTP GET, byte ranges).
Progressive,
/// DASH manifest URL — adaptive, segment-by-segment.
Dash,
/// HLS manifest URL — adaptive, mostly for live.
Hls,
/// Torrent magnet / .torrent URL. Not used by YT; included for parity
/// with NPE for other services we may add later.
Torrent,
}

89
src/stream/mod.rs Normal file
View file

@ -0,0 +1,89 @@
// Stream value types — mirrors NPE's stream/ package.
//
// Shapes lifted from NPE so the Rust port produces the same data
// callers see today. Codec strings ("avc1.4d401f", "vp9.2") are kept as
// opaque strings — they come from YT's response mimeType and we don't
// classify them ourselves.
pub mod audio;
pub mod delivery;
pub mod subtitles;
pub mod video;
pub use audio::AudioStream;
pub use delivery::DeliveryMethod;
pub use subtitles::SubtitlesStream;
pub use video::VideoStream;
use crate::image::ImageSet;
#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
pub enum StreamType {
None,
VideoStream,
AudioStream,
LiveStream,
AudioLiveStream,
VideoLiveStream,
PostLiveStream,
PostLiveAudioStream,
}
/// Per-video metadata + format lists. Mirrors NPE StreamInfo.java.
#[derive(Clone, Debug, Default)]
pub struct StreamInfo {
pub service_id: u32,
pub url: String,
pub video_id: String,
pub stream_type: Option<StreamType>,
pub name: String,
pub description: String,
pub duration_seconds: i64,
pub view_count: i64,
pub like_count: i64,
pub dislike_count: i64,
pub upload_date_iso: Option<String>,
pub thumbnails: ImageSet,
pub uploader_name: String,
pub uploader_url: String,
pub uploader_id: String,
pub uploader_avatars: ImageSet,
pub uploader_subscriber_count: i64,
pub uploader_verified: bool,
pub audio_streams: Vec<AudioStream>,
pub video_streams: Vec<VideoStream>,
pub video_only_streams: Vec<VideoStream>,
pub subtitles: Vec<SubtitlesStream>,
pub dash_manifest_url: Option<String>,
pub hls_manifest_url: Option<String>,
pub related_streams: Vec<StreamInfoItem>,
pub tags: Vec<String>,
pub category: String,
}
/// Shorter "card" used in search results, channel video lists, playlists.
/// Mirrors NPE StreamInfoItem.java.
#[derive(Clone, Debug, Default)]
pub struct StreamInfoItem {
pub service_id: u32,
pub url: String,
pub name: String,
pub thumbnails: ImageSet,
pub uploader_name: String,
pub uploader_url: String,
pub uploader_id: String,
pub uploader_verified: bool,
pub duration_seconds: i64,
pub view_count: i64,
pub upload_date_relative: String,
pub stream_type: Option<StreamType>,
pub short_description: String,
}

11
src/stream/subtitles.rs Normal file
View file

@ -0,0 +1,11 @@
// SubtitlesStream — closed-caption / subtitle track.
#[derive(Clone, Debug)]
pub struct SubtitlesStream {
pub url: String,
pub language_code: String,
pub name: String,
pub is_auto_generated: bool,
/// MIME type (`application/ttml+xml`, `text/vtt`, `application/x-subrip`, etc.)
pub mime: String,
}

22
src/stream/video.rs Normal file
View file

@ -0,0 +1,22 @@
// VideoStream — one DASH or progressive video variant. `video_only=true`
// means it's an adaptive video-only track that has to be paired with an
// AudioStream by the consumer (ExoPlayer handles this via DASH).
use crate::stream::DeliveryMethod;
use crate::youtube::itag::MediaFormat;
#[derive(Clone, Debug)]
pub struct VideoStream {
pub itag: u32,
pub url: String,
pub format: MediaFormat,
pub delivery: DeliveryMethod,
pub resolution: String,
pub fps: u32,
pub bandwidth: Option<u32>,
pub codec: Option<String>,
pub content_length_bytes: Option<i64>,
pub width: Option<u32>,
pub height: Option<u32>,
pub video_only: bool,
}

View file

@ -8,4 +8,5 @@ pub mod constants;
pub mod itag;
pub mod js;
pub mod parsing;
pub mod stream_helper;

View file

@ -16,7 +16,6 @@ use regex::Regex;
use serde_json::Value;
use crate::downloader::request::Request;
use crate::downloader::Downloader;
use crate::exceptions::ParsingError;
use crate::localization::{ContentCountry, Localization};
use crate::newpipe::NewPipe;

View file

@ -0,0 +1,261 @@
// YoutubeStreamHelper — 5 per-client /player request helpers.
// Mirrors NPE services/youtube/YoutubeStreamHelper.java.
//
// Each helper builds the InnerTube envelope + the per-endpoint payload
// (videoId, cpn, contentCheckOk, racyCheckOk, playbackContext, optional
// serviceIntegrityDimensions for poToken), POSTs to the right URL with
// the right headers, returns the parsed JSON.
use serde_json::{json, Map, Value};
use crate::downloader::request::Request;
use crate::exceptions::{NetworkError, ParsingError};
use crate::localization::{ContentCountry, Localization};
use crate::newpipe::NewPipe;
use crate::youtube::client_request::{build_envelope, InnertubeClientRequestInfo};
use crate::youtube::constants::*;
use crate::youtube::parsing::{
android_user_agent, ios_user_agent, mobile_post_headers, youtube_post_headers,
};
/// Builds a 12-char alphanumeric `cpn` (content playback nonce). NPE uses
/// a custom alphabet; we mirror it. NOT cryptographically random — just
/// shaped to look like YT's own format. Per-client cpn, so we keep it as
/// a free helper.
pub fn generate_content_playback_nonce() -> String {
use std::time::{SystemTime, UNIX_EPOCH};
const ALPHABET: &[u8] =
b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
let mut seed = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_nanos() as u64)
.unwrap_or(0);
let mut out = String::with_capacity(16);
for _ in 0..16 {
seed = seed.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
out.push(ALPHABET[(seed.rotate_right(7) as usize) % ALPHABET.len()] as char);
}
out
}
/// Common body fields every /player call needs.
fn add_player_body_fields(body: &mut Map<String, Value>, video_id: &str, cpn: &str) {
body.insert("videoId".into(), Value::String(video_id.into()));
body.insert("cpn".into(), Value::String(cpn.into()));
body.insert("contentCheckOk".into(), Value::Bool(true));
body.insert("racyCheckOk".into(), Value::Bool(true));
}
fn add_playback_context(body: &mut Map<String, Value>, signature_timestamp: i32, referer: &str) {
body.insert(
"playbackContext".into(),
json!({
"contentPlaybackContext": {
"signatureTimestamp": signature_timestamp,
"referer": referer,
}
}),
);
}
fn add_service_integrity_dimensions(body: &mut Map<String, Value>, po_token: &str) {
body.insert(
"serviceIntegrityDimensions".into(),
json!({ "poToken": po_token }),
);
}
fn envelope_to_body(envelope: Value) -> Map<String, Value> {
match envelope {
Value::Object(map) => map,
_ => Map::new(),
}
}
/// WEB-client metadata-only /player call. Per audit Track A §4.4 — used
/// for microformat + thumbnails only; never used as a stream URL source.
pub fn get_web_metadata_player_response(
video_id: &str,
localization: &Localization,
content_country: &ContentCountry,
signature_timestamp: i32,
) -> Result<Value, ExtractionError> {
let info = InnertubeClientRequestInfo::of_web_client();
let env = build_envelope(&info, localization, content_country, None);
let mut body = envelope_to_body(env);
add_player_body_fields(&mut body, video_id, &generate_content_playback_nonce());
add_playback_context(&mut body, signature_timestamp, "https://www.youtube.com");
let url = format!(
"{YOUTUBEI_V1_URL}player{DISABLE_PRETTY_PRINT_PARAM}&$fields=microformat,videoDetails.thumbnail.thumbnails,videoDetails.videoId"
);
post_youtube(&url, &Value::Object(body), youtube_post_headers())
}
/// WEB_EMBEDDED_PLAYER /player call. Carries embedUrl + signatureTimestamp.
pub fn get_web_embedded_player_response(
video_id: &str,
localization: &Localization,
content_country: &ContentCountry,
signature_timestamp: i32,
po_token: Option<&str>,
) -> Result<Value, ExtractionError> {
let info = InnertubeClientRequestInfo::of_web_embedded_player_client();
let embed_url = format!("https://www.youtube.com/embed/{video_id}");
let env = build_envelope(&info, localization, content_country, Some(&embed_url));
let mut body = envelope_to_body(env);
add_player_body_fields(&mut body, video_id, &generate_content_playback_nonce());
add_playback_context(&mut body, signature_timestamp, &embed_url);
if let Some(token) = po_token {
add_service_integrity_dimensions(&mut body, token);
}
let url = format!("{YOUTUBEI_V1_URL}player{DISABLE_PRETTY_PRINT_PARAM}");
post_youtube(&url, &Value::Object(body), youtube_post_headers())
}
/// ANDROID full /player call. Hits the gapis endpoint with the mobile
/// header set. Caller must supply (cpn, po_token) — they are paired with
/// the URLs the response will return; mixing them with iOS values returns
/// 403.
pub fn get_android_player_response(
video_id: &str,
localization: &Localization,
content_country: &ContentCountry,
cpn: &str,
po_token: Option<&str>,
visitor_data: Option<&str>,
) -> Result<Value, ExtractionError> {
let mut info = InnertubeClientRequestInfo::of_android_client();
if let Some(v) = visitor_data {
info.client_info.visitor_data = Some(v.into());
}
let env = build_envelope(&info, localization, content_country, None);
let mut body = envelope_to_body(env);
add_player_body_fields(&mut body, video_id, cpn);
if let Some(token) = po_token {
add_service_integrity_dimensions(&mut body, token);
}
let url = format!(
"{YOUTUBEI_V1_GAPIS_URL}player{DISABLE_PRETTY_PRINT_PARAM}&t={t}&id={video_id}",
t = generate_content_playback_nonce()
);
let ua = android_user_agent(content_country);
post_youtube(&url, &Value::Object(body), mobile_post_headers(&ua))
}
/// ANDROID `/reel/reel_item_watch` fallback — used when no poToken is
/// available. Returns a `playerResponse`-shaped JSON wrapped inside the
/// reel response.
pub fn get_android_reel_player_response(
video_id: &str,
localization: &Localization,
content_country: &ContentCountry,
cpn: &str,
) -> Result<Value, ExtractionError> {
let info = InnertubeClientRequestInfo::of_android_client();
let env = build_envelope(&info, localization, content_country, None);
let mut body = envelope_to_body(env);
body.insert(
"playerRequest".into(),
json!({
"videoId": video_id,
"cpn": cpn,
}),
);
add_player_body_fields(&mut body, video_id, cpn);
let url = format!(
"{YOUTUBEI_V1_GAPIS_URL}reel/reel_item_watch{DISABLE_PRETTY_PRINT_PARAM}&t={t}&id={video_id}&$fields=playerResponse",
t = generate_content_playback_nonce()
);
let ua = android_user_agent(content_country);
post_youtube(&url, &Value::Object(body), mobile_post_headers(&ua))
}
/// IOS /player call. The iOS-progressive URLs returned here are subject
/// to YT's ~917 KiB server-side cap — DO NOT route playback through
/// these as the primary path. They're useful for HLS manifests on live
/// streams. (See workspace memory/2026-05-24-night2-straw-vc18-rollback.md
/// for the cap diagnostic.)
pub fn get_ios_player_response(
video_id: &str,
localization: &Localization,
content_country: &ContentCountry,
cpn: &str,
po_token: Option<&str>,
visitor_data: Option<&str>,
) -> Result<Value, ExtractionError> {
let mut info = InnertubeClientRequestInfo::of_ios_client();
if let Some(v) = visitor_data {
info.client_info.visitor_data = Some(v.into());
}
let env = build_envelope(&info, localization, content_country, None);
let mut body = envelope_to_body(env);
add_player_body_fields(&mut body, video_id, cpn);
if let Some(token) = po_token {
add_service_integrity_dimensions(&mut body, token);
}
let url = format!(
"{YOUTUBEI_V1_GAPIS_URL}player{DISABLE_PRETTY_PRINT_PARAM}&t={t}&id={video_id}",
t = generate_content_playback_nonce()
);
let ua = ios_user_agent(content_country);
post_youtube(&url, &Value::Object(body), mobile_post_headers(&ua))
}
fn post_youtube(
url: &str,
body: &Value,
headers: Vec<(String, String)>,
) -> Result<Value, ExtractionError> {
let downloader = NewPipe::downloader().ok_or(ExtractionError::DownloaderMissing)?;
let serialized = serde_json::to_vec(body).map_err(|e| {
ExtractionError::Parsing(ParsingError::Invalid(format!("serialize body: {e}")))
})?;
let mut builder = Request::post(url, serialized);
for (k, v) in headers {
builder = builder.add_header(&k, &v);
}
let resp = downloader.execute(builder.build())?;
if resp.response_code() != 200 {
return Err(ExtractionError::Network(NetworkError::Transport(format!(
"HTTP {} from {url}",
resp.response_code()
))));
}
let parsed: Value = serde_json::from_str(resp.response_body())
.map_err(|e| ExtractionError::Parsing(ParsingError::JsonShape(e.to_string())))?;
Ok(parsed)
}
#[derive(Debug, thiserror::Error)]
pub enum ExtractionError {
#[error("network: {0}")]
Network(#[from] NetworkError),
#[error("parsing: {0}")]
Parsing(#[from] ParsingError),
#[error("downloader not initialized")]
DownloaderMissing,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn cpn_is_16_chars_alphanumeric() {
let cpn = generate_content_playback_nonce();
assert_eq!(cpn.len(), 16);
assert!(cpn
.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_'));
}
#[test]
fn two_consecutive_cpns_differ() {
// The nanos-seeded LCG advances monotonically — two back-to-back
// calls should produce different cpns.
let a = generate_content_playback_nonce();
std::thread::sleep(std::time::Duration::from_millis(2));
let b = generate_content_playback_nonce();
assert_ne!(a, b);
}
}