diff --git a/Cargo.toml b/Cargo.toml index f56ee19..ae8bbec 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "rustypipe" -version = "0.11.5" +version = "0.11.4" rust-version = "1.67.1" edition.workspace = true authors.workspace = true @@ -74,7 +74,7 @@ path_macro = "1.0.0" tracing-test = "0.2.5" # Included crates -rustypipe = { path = ".", version = "0.11.5", default-features = false } +rustypipe = { path = ".", version = "0.11.4", default-features = false } rustypipe-downloader = { path = "./downloader", version = "0.3.1", default-features = false, features = [ "indicatif", "audiotag", diff --git a/docs/PORTING_NPE_PIPELINE.md b/docs/PORTING_NPE_PIPELINE.md deleted file mode 100644 index f1311c6..0000000 --- a/docs/PORTING_NPE_PIPELINE.md +++ /dev/null @@ -1,123 +0,0 @@ -# Porting NPE's player-JS pipeline into rustypipe - -**Branch:** `kayos/m1-sig-port` -**Goal:** Replace `src/deobfuscate.rs`'s narrow regex approach with -NewPipeExtractor's full pipeline so the fork keeps working as YouTube -rotates its `player_ias.vflset/.../base.js`. - -## The diagnosis - -Upstream rustypipe 0.11.4 (June 2025) extracts the signature -deobfuscation function with six regex patterns aimed at the call site -(`var&&(var=SIGFN(decodeURIComponent(var)))`). On current YouTube player -`c2f7551f` (May 2026) all six miss. NewPipeExtractor master's six -patterns also miss on the same file — and NPE-master's nsig (throttling) -pipeline is openly broken (`TeamNewPipe/NewPipeExtractor#1339`, open -since 2026-02-03; the dev branch has had no sig/nsig commits in 60 -days). The reason NPE *appears* to work in apps is that the -Innertube paths for Android / iOS / TV clients return stream URLs that -don't carry an obfuscated `s=` signature for most videos — sig deobf -is a fallback the typical playback path never reaches. - -Two structural changes have happened since rustypipe was last cut: - -1. **The sig fn call site now sometimes takes a numeric prefix arg.** - New shape: `var&&(var=SIGFN(123,decodeURIComponent(var)))`. NPE's - regex set has one pattern for this; rustypipe doesn't. - -2. **YT routes literal token references through a global string array.** - Near the top of every recent `player.js`: - ```js - var e="startsWith{redirector.googlevideo.com{split{...{decodeURIComponent{...".split("{") - ``` - Calls then reference `e[N]` instead of the literal symbol. So an - anchor like `decodeURIComponent` is no longer present at the sig-fn - call site as text — it's `e[37]` (or whatever the index is). - -NPE's pipeline handles (1) but not (2). To make the fork robust we -do both. - -## What we're porting - -| NPE file | Rust target | Notes | -|---|---|---| -| `YoutubeSignatureUtils.java` | `src/deobfuscate.rs` (rewritten) | Sig fn name + body + helper-obj + global-var assembly | -| `YoutubeThrottlingParameterUtils.java` | new `src/deobfuscate/throttling.rs` module | nsig fn name + body + early-return fixup | -| `utils/jsextractor/JavaScriptExtractor.matchToClosingBrace` | new `src/deobfuscate/jslexer.rs` | Find a `name=function` site, walk braces until balanced | -| `YoutubeJavaScriptPlayerManager.java` | already covered by rustypipe's `cache.rs` | We keep rustypipe's cache shape but extend the cached payload to include nsig fn + global var | - -## Pipeline (the desired flow) - -``` -player.js (string) - │ - ├── extract_sig_fn_name // 6+ regex patterns, w/ globalVar[N] retry - │ │ - │ └── fall back to: // globalVar[N] indirection - │ 1. extract_global_string_array_indices() - │ 2. find N where arr[N] == "decodeURIComponent" - │ 3. re-run patterns with `(?:decodeURIComponent|globalVar\[N\])` - │ - ├── extract_sig_fn_body // lexer brace-walk, regex fallback - ├── extract_global_var // var X="...".split("{") (verbatim) - ├── extract_helper_obj_name // from inside fn body: [;,]NAME[.. - ├── extract_helper_obj_body // var NAME={...}; - └── assemble: - globalVar + ";" + helperObj + ";" + deobfFn + ";" + callerFn - ── eval in rquickjs ──→ deobf_sig(input) ⇒ deobf(input) - -player.js (string) - │ - ├── extract_nsig_fn_name // 7 NPE patterns including arr-index variants - │ │ - │ └── if array variant: resolve var NAME=[fn1,fn2,fnN] - │ - ├── extract_nsig_fn_body // lexer brace-walk - ├── fixup_early_return // strip `if(typeof X==="undefined")return arg;` - └── eval in rquickjs ──→ deobf_nsig(input) ⇒ deobf(input) -``` - -## Milestones - -| ID | Subject | Effort | Gate | -|---|---|---|---| -| M1.1 | Port `matchToClosingBrace` (clean brace walker) to `src/deobfuscate/jslexer.rs` | S | Standalone unit test against a tiny `var Wka=function(d){return /,/}/` fixture | -| M1.2 | Replace `get_sig_fn_name` with NPE's 6 patterns (including `(\d+,)decodeURIComponent`) | S | T-1 fixture is the prior-working `9216d1f7` player + new fixture `c2f7551f.js` | -| M1.3 | Add `extract_global_string_array` returning `(var_name, Vec)` | S | unit test for the `var e="…".split("{")` shape | -| M1.4 | Add `extract_helper_obj_name` from fn body + `extract_helper_obj_body` | S | unit test against the `qB={w8:..,EC:..,Np:..}` style fixture | -| M1.5 | Assemble globalVar + helperObj + sigFn + caller; round-trip via rquickjs | M | the existing `t_deobfuscate_sig` test fixture passes via new code path | -| M1.6 | Add globalVar[N] indirection retry to sig fn name extraction | M | new test: a fixture where the call site uses `e[N]` instead of `decodeURIComponent` | -| M1.7 | Port nsig pipeline (`YoutubeThrottlingParameterUtils`) — 7 patterns + array-resolution + early-return fixup | M | port + run NPE's `nsig_tests` table in `tests/sig_tests.rs` | -| M1.8 | Add live integration test downloading current `player.js` and asserting round-trip end-to-end | S | `cargo test --features live -- t_update` | -| M1.9 | Bump `Cargo.toml` to `0.12.0-sulkta.1`, tag, push to `Sulkta-Coop/rustypipe` `kayos/m1-sig-port` | S | clean release | - -## Not in M1 (parking lot) - -- Deno / external-JS-runtime swap (yt-dlp's path; we revisit if M1 - doesn't hold). -- Caching the assembled deobf code across processes (cookie-jar style - on Android). -- N-tier fallback against multiple geo `player.js` variants if YT ever - splits them. - -## Why this is safe-ish to ship - -NPE's pipeline is what straw v0.1.0-X currently relies on for the rare -videos that hit the sig path. Porting it 1:1 to Rust gives us a -behavioural baseline equivalent to what NPE provides — no regression -from the Java side. The globalVar[N] indirection added in M1.6 is the -forward-looking piece that handles current `c2f7551f`-style -obfuscation NPE doesn't yet handle. If M1.6 turns out unnecessary -(e.g. NPE-dev lands its own fix first), we can pull the patterns into -parity but keep our generalised resolution layer. - -## Tracking - -Workspace task IDs: -- `#226` parent — fork + ship the patched fork -- `#230` audit + port the sig pipeline (this milestone) -- `#231` build pipeline + crafting-table integration - -When M1 lands, U-2..U-5 revival becomes a `Cargo.toml` dep flip in -`rust/strawcore/` + cherry-pick of the parked commits -(`7ff5ac79e..a13896f5e` on `Sulkta-Coop/straw`). diff --git a/src/client/mod.rs b/src/client/mod.rs index 14c3ead..06386bc 100644 --- a/src/client/mod.rs +++ b/src/client/mod.rs @@ -100,17 +100,7 @@ impl ClientType { } fn needs_deobf(self) -> bool { - // As of YT iOS client v19.x and Android InnerTube v19.x (Dec 2024+), - // both paths return pre-signed stream URLs (no &s= cipher param, no - // &n= throttling param), so they don't need player.js deobfuscation. - // YT has experimented with serving cipher streams to iOS and n= params - // to Android in past A/B tests — if that returns, this matcher needs - // to be revisited along with the po_token strategy and a switchable - // fallback to a Tv/Desktop client. - // - // Skipping the deobf fetch here keeps the player path alive even when - // YouTube rotates player.js to a shape our extractor doesn't recognise. - !matches!(self, ClientType::Ios | ClientType::Android) + !matches!(self, ClientType::Ios) } fn needs_po_token(self) -> bool { @@ -1282,87 +1272,55 @@ impl RustyPipe { /// Get deobfuscation data (either from cache or extracted from YouTube's JavaScript code) async fn get_deobf_data(&self) -> Result { - // Cheap read-path first: avoid serialising concurrent player calls behind - // the write lock when the cache is already fresh (the common case after - // the first request). Only escalate to a write lock on cache miss. - // (Sulkta fork audit LOW-2.) - { - let read_guard = self.inner.cache.deobf.read().await; - if let Some(data) = read_guard.get() { - return Ok(data.clone()); - } - } - // Write lock here to prevent concurrent tasks from fetching the same data let mut deobf_data = self.inner.cache.deobf.write().await; - // Recheck under the write lock — another writer may have filled the cache - // between the read drop and the write acquire. - if let Some(data) = deobf_data.get() { - return Ok(data.clone()); - } + match deobf_data.get() { + Some(deobf_data) => Ok(deobf_data.clone()), + None => { + // Only attempt to fetch deobf data every 24 hours to avoid a flood of error reports + // if the client JS cannot be parsed + if deobf_data.should_retry() { + tracing::debug!("getting deobf data"); - // Only attempt to fetch deobf data every 24 hours to avoid a flood of error reports - // if the client JS cannot be parsed - if deobf_data.should_retry() { - tracing::debug!("getting deobf data"); - - match DeobfData::extract(&self.inner.http, self.inner.reporter.as_deref()).await - { - Ok(new_data) => { - // Sulkta fork (audit CRIT-2): soft-failed DeobfData (missing - // sig_fn or nsig_fn) is cached with a much shorter freshness - // window so the next player call retries extraction soon, in - // case YouTube has rotated player.js back to a shape we - // recognise. Otherwise a single rotation could poison the - // cache for the full 24h freshness window even if YT fixed - // things minutes later. - let mut entry = CacheEntry::from(new_data.clone()); - if !new_data.is_complete() { - // Force re-extraction on the next request after ~1h - // (so AddrLane workers and short-lived processes get a - // chance to recover without restarting), AND stamp - // failed_version so a library version bump also triggers - // re-extraction immediately. - entry.retry_later(1); - tracing::warn!( - "deobf data partial (has_sig={}, has_nsig={}); caching with 1h retry", - new_data.has_sig(), - new_data.has_nsig() - ); + match DeobfData::extract(&self.inner.http, self.inner.reporter.as_deref()).await + { + Ok(new_data) => { + // Write new data to the cache + *deobf_data = CacheEntry::from(new_data.clone()); + drop(deobf_data); + self.store_cache().await; + Ok(new_data) + } + Err(e) => { + // Try to fall back to expired cache data if available, otherwise return error + deobf_data.retry_later(24); + let res = match deobf_data.get_expired() { + Some(d) => { + tracing::warn!("could not get new deobf data ({e}), falling back to expired cache"); + Ok(d.clone()) + } + None => Err(e), + }; + drop(deobf_data); + self.store_cache().await; + res + } } - *deobf_data = entry; - drop(deobf_data); - self.store_cache().await; - Ok(new_data) - } - Err(e) => { - // Try to fall back to expired cache data if available, otherwise return error - deobf_data.retry_later(24); - let res = match deobf_data.get_expired() { + } else { + match deobf_data.get_expired() { Some(d) => { - tracing::warn!("could not get new deobf data ({e}), falling back to expired cache"); + tracing::warn!( + "could not get new deobf data, falling back to expired cache" + ); Ok(d.clone()) } - None => Err(e), - }; - drop(deobf_data); - self.store_cache().await; - res + None => Err(Error::Extraction(ExtractionError::Deobfuscation( + "could not get deobf data".into(), + ))), + } } } - } else { - match deobf_data.get_expired() { - Some(d) => { - tracing::warn!( - "could not get new deobf data, falling back to expired cache" - ); - Ok(d.clone()) - } - None => Err(Error::Extraction(ExtractionError::Deobfuscation( - "could not get deobf data".into(), - ))), - } } } diff --git a/src/client/player.rs b/src/client/player.rs index 927ab1d..9bae601 100644 --- a/src/client/player.rs +++ b/src/client/player.rs @@ -245,31 +245,8 @@ impl RustyPipeQuery { /// The order may change in the future in case YouTube applies changes to their /// platform that disable a client or make it less reliable. pub fn player_client_order(&self) -> &'static [ClientType] { - // iOS first — it skips player.js deobfuscation entirely (pre-signed - // stream URLs) AND doesn't require device attestation the way Android - // does. Tv is the secondary fallback (needs sig_timestamp in the - // request payload, but the soft-fail extraction keeps that piece alive - // even when sig_fn/nsig_fn extraction breaks). - // - // Android is intentionally NOT in the order: `needs_po_token` doesn't - // flag Android, so requests would fire unsigned and increasingly trip - // YouTube's "Sign in to confirm you're not a bot" — and that mapping - // becomes Unavailable{Captcha} which is not switchable. Re-add when - // a real po_token strategy for Android lands. - // - // Desktop is only consulted when botguard is wired (po_token signing - // available). For authenticated-via-cookie users on botguard - // sessions we put Desktop second so they don't walk through three - // wrong clients before reaching the one their cookie works on. if self.client.inner.botguard.is_some() { - if self.opts.auth == Some(true) { - // Authed-cookie users: prefer Desktop second (where the cookie - // actually maps to an OAuth session), Tv third (OAuth token), - // iOS first as a quick anonymous path. - &[ClientType::Ios, ClientType::Desktop, ClientType::Tv] - } else { - &[ClientType::Ios, ClientType::Tv, ClientType::Desktop] - } + &[ClientType::Desktop, ClientType::Ios, ClientType::Tv] } else { &[ClientType::Ios, ClientType::Tv] } @@ -725,19 +702,10 @@ impl<'a> StreamsMapper<'a> { None => match signature_cipher { Some(signature_cipher) => { self.cipher_to_url_params(signature_cipher).map_err(|e| { - // Audit follow-up to CRIT-1: keep the Deobfuscation - // error class through the wrapper so `switch_client` - // still trips when a cipher stream surfaces on a - // client whose deobf is unavailable. The previous - // `InvalidData(...)` wrapping silently demoted this - // to a non-switchable error and killed the whole - // player_from_clients chain on Mobile/Desktop sig paths. - ExtractionError::Deobfuscation( - format!( - "signatureCipher `{signature_cipher}`: {e}" - ) - .into(), - ) + ExtractionError::InvalidData( + format!("Could not deobfuscate signatureCipher `{signature_cipher}`: {e}") + .into(), + ) }) } None => Err(ExtractionError::InvalidData( diff --git a/src/deobfuscate.rs b/src/deobfuscate.rs index 703bc7b..d08a6e1 100644 --- a/src/deobfuscate.rs +++ b/src/deobfuscate.rs @@ -15,28 +15,6 @@ use crate::{ pub struct Deobfuscator { ctx: Context, - has_sig: bool, - has_nsig: bool, -} - -impl DeobfData { - /// True when both sig_fn and nsig_fn were extracted from the player.js. - /// Used by the cache layer to stamp partial extractions with a shorter - /// retry window than fully-good ones (see `get_deobf_data`). - pub fn is_complete(&self) -> bool { - !self.sig_fn.is_empty() && !self.nsig_fn.is_empty() - } - - /// True when the signature deobfuscation fn was extracted successfully. - pub fn has_sig(&self) -> bool { - !self.sig_fn.is_empty() - } - - /// True when the throttling-parameter (nsig) deobfuscation fn was - /// extracted successfully. - pub fn has_nsig(&self) -> bool { - !self.nsig_fn.is_empty() - } } #[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq, Eq)] @@ -50,10 +28,7 @@ pub struct DeobfData { impl DeobfData { /// Download and extract the latest deobfuscation data from YouTube /// - /// Creates a report if the data could not be extracted, including a - /// `Level::WARN` report on partial (sig_fn / nsig_fn) extraction failure - /// so reporter-based consumers (e.g. `FileReporter`) get an artefact to - /// debug new player.js shapes against. + /// Creates a report if the data could not be extracted pub async fn extract(http: &Client, reporter: Option<&dyn Reporter>) -> Result { let js_url = get_player_js_url(http).await?; let player_js = get_response(http, &js_url).await?; @@ -61,98 +36,35 @@ impl DeobfData { let res = Self::extract_fns(&js_url, &player_js); - match &res { - Err(e) => { - if let Some(reporter) = reporter { - let report = Report { - info: RustyPipeInfo::new(None, None), - level: Level::ERR, - operation: "extract_deobf", - error: Some(e.to_string()), - msgs: vec![], - deobf_data: None, - http_request: crate::report::HTTPRequest { - url: &js_url, - method: "GET", - req_header: None, - req_body: None, - status: 200, - resp_body: player_js, - }, - }; - reporter.report(&report); - } + if let Err(e) = &res { + if let Some(reporter) = reporter { + let report = Report { + info: RustyPipeInfo::new(None, None), + level: Level::ERR, + operation: "extract_deobf", + error: Some(e.to_string()), + msgs: vec![], + deobf_data: None, + http_request: crate::report::HTTPRequest { + url: &js_url, + method: "GET", + req_header: None, + req_body: None, + status: 200, + resp_body: player_js, + }, + }; + reporter.report(&report); } - Ok(data) if !data.is_complete() => { - // Soft-fail observability — without this, a sig/nsig extraction - // regression is invisible to reporter-based consumers and only - // shows up at `RUST_LOG=warn`. straw / torttube depend on the - // reporter for in-app crash dumps. - if let Some(reporter) = reporter { - let mut missing = Vec::with_capacity(2); - if !data.has_sig() { - missing.push("sig_fn"); - } - if !data.has_nsig() { - missing.push("nsig_fn"); - } - let report = Report { - info: RustyPipeInfo::new(None, None), - level: Level::WRN, - operation: "extract_deobf_soft_fail", - error: Some(format!( - "partial extraction; missing: {}", - missing.join(", ") - )), - msgs: vec![], - deobf_data: Some(data.clone()), - http_request: crate::report::HTTPRequest { - url: &js_url, - method: "GET", - req_header: None, - req_body: None, - status: 200, - resp_body: player_js, - }, - }; - reporter.report(&report); - } - } - Ok(_) => {} } res } pub fn extract_fns(js_url: &str, player_js: &str) -> Result { - // The signature timestamp is the only piece every "needs_deobf" client - // actually requires in its request payload — without it, those clients - // get an error back. So we hard-fail on sts extraction. + let sig_fn = get_sig_fn(player_js)?; + let nsig_fn = get_nsig_fn(player_js)?; let sts = get_sts(player_js)?; - // sig_fn and nsig_fn are needed only when YouTube returns stream URLs - // containing the &s= cipher / &n= throttling params. Most clients - // (iOS, Android, Tv) get pre-signed URLs and never touch these. - // Tolerate extraction failures here so a single rotated player.js - // shape doesn't bring down the whole player path for those clients. - // When a stream URL DOES carry &s= / &n=, `Deobfuscator::deobfuscate_sig` - // / `deobfuscate_nsig` short-circuit with a switchable error class - // (see `ExtractionError::switch_client` whitelist) so the client - // fallback loop tries the next client instead of killing the call. - let sig_fn = match get_sig_fn(player_js) { - Ok(f) => f, - Err(e) => { - tracing::warn!("could not extract sig deobf fn (sig deobfuscation disabled until YT rotates player.js again): {}", e); - String::new() - } - }; - let nsig_fn = match get_nsig_fn(player_js) { - Ok(f) => f, - Err(e) => { - tracing::warn!("could not extract nsig deobf fn (throttling parameter deobf disabled until YT rotates player.js again): {}", e); - String::new() - } - }; - Ok(Self { js_url: js_url.to_owned(), sig_fn, @@ -167,63 +79,19 @@ impl Deobfuscator { pub fn new(data: &DeobfData) -> Result { let rt = Runtime::new()?; let ctx = Context::full(&rt)?; - let has_sig = data.has_sig(); - let has_nsig = data.has_nsig(); - ctx.with(|ctx| -> Result<(), rquickjs::Error> { - // Skip JS eval for any deobf fn we couldn't extract. The matching - // `deobfuscate_sig` / `deobfuscate_nsig` calls below guard on - // `has_sig` / `has_nsig` and short-circuit with a clean - // `sig fn unavailable` error instead of falling into rquickjs - // and getting an opaque `FromJs { from: "undefined" ... }` — - // and that opaque shape used to land in `ExtractionError::Deobfuscation` - // which the upstream client-fallback loop treats as non-switchable. - if has_sig { - let mut opts = rquickjs::context::EvalOptions::default(); - opts.strict = false; - ctx.eval_with_options::<(), _>(data.sig_fn.as_bytes(), opts)?; - } - if has_nsig { - let mut opts = rquickjs::context::EvalOptions::default(); - opts.strict = false; - ctx.eval_with_options::<(), _>(data.nsig_fn.as_bytes(), opts)?; - } - Ok(()) + ctx.with(|ctx| { + let mut opts = rquickjs::context::EvalOptions::default(); + opts.strict = false; + ctx.eval_with_options::<(), _>(data.sig_fn.as_bytes(), opts)?; + let mut opts = rquickjs::context::EvalOptions::default(); + opts.strict = false; + ctx.eval_with_options::<(), _>(data.nsig_fn.as_bytes(), opts) })?; - Ok(Self { - ctx, - has_sig, - has_nsig, - }) - } - - /// True when the underlying DeobfData had a valid sig fn extracted. - /// Exposed for consumers that want to short-circuit cipher streams - /// without invoking `deobfuscate_sig` and observing the error. - #[allow(dead_code)] - pub fn has_sig(&self) -> bool { - self.has_sig - } - - /// True when the underlying DeobfData had a valid nsig fn extracted. - /// Exposed for consumers that want to short-circuit throttled URLs. - #[allow(dead_code)] - pub fn has_nsig(&self) -> bool { - self.has_nsig + Ok(Self { ctx }) } /// Deobfuscate the `s` parameter from the `signature_cipher` field pub fn deobfuscate_sig(&self, sig: &str) -> Result { - if !self.has_sig { - // Short-circuit with a recognisable error class. Goes through - // `From for ExtractionError` → `ExtractionError::Deobfuscation`, - // which is in `switch_client`'s whitelist as of the Sulkta fork — so - // `player_from_clients` will try the next client (typically iOS, - // which doesn't carry signature_cipher streams) rather than killing - // the whole call. - return Err(DeobfError::Other( - "sig fn unavailable (player.js rotation; deobf extraction soft-failed)".into(), - )); - } let res = self .ctx .with(|ctx| call_fn(&ctx, DEOBF_SIG_FUNC_NAME, sig))?; @@ -233,13 +101,6 @@ impl Deobfuscator { /// Deobfuscate the `n` stream URL parameter to circumvent throttling pub fn deobfuscate_nsig(&self, nsig: &str) -> Result { - if !self.has_nsig { - // Same short-circuit as deobfuscate_sig — switchable error class - // for the client-fallback loop instead of an opaque rquickjs panic. - return Err(DeobfError::Other( - "nsig fn unavailable (player.js rotation; throttle deobf soft-failed)".into(), - )); - } let res = self .ctx .with(|ctx| call_fn(&ctx, DEOBF_NSIG_FUNC_NAME, nsig))?; diff --git a/src/error.rs b/src/error.rs index 655c3de..81cd7fa 100644 --- a/src/error.rs +++ b/src/error.rs @@ -259,13 +259,6 @@ impl ExtractionError { .. } | ExtractionError::WrongResult(_) | ExtractionError::Botguard(_) - // Sulkta fork (CRIT-1): deobf failures are usually transient - // — YT rotated the player.js to a shape our regex doesn't - // recognise, or served a cipher stream to a client that doesn't - // have a working sig fn in cache. Switching to another client - // (iOS first, which doesn't need deobf at all) is the right - // recovery move rather than killing the whole call. - | ExtractionError::Deobfuscation(_) ) } diff --git a/tests/sulkta_smoke.rs b/tests/sulkta_smoke.rs deleted file mode 100644 index 228aafd..0000000 --- a/tests/sulkta_smoke.rs +++ /dev/null @@ -1,167 +0,0 @@ -//! Sulkta-fork smoke tests for the player pipeline. -//! -//! Verifies the patched default client order (`Ios, Tv` without botguard) plus -//! the soft-fail DeobfData::extract works against current YouTube player.js. -//! -//! Run with: `cargo test --test sulkta_smoke -- --nocapture` - -use std::path::PathBuf; - -use rstest::{fixture, rstest}; -use rustypipe::client::{ClientType, RustyPipe}; -use rustypipe::error::{Error, ExtractionError, UnavailabilityReason}; - -/// A stable, long-running, public-domain music video. Used by upstream -/// tests too (`n4tK7LYFxI0` = Spektrem - Shine, NCS). -const TEST_VIDEO_ID: &str = "n4tK7LYFxI0"; - -/// Build a `RustyPipe` with a per-process scratch storage dir. Avoids the -/// concurrent-write race with `tests/youtube.rs` that shares `rustypipe_cache.json` -/// in the repo root, which was tripping audit MED-3. -#[fixture] -fn rp() -> RustyPipe { - let scratch: PathBuf = std::env::temp_dir().join(format!( - "rustypipe-sulkta-smoke-{}-{}", - std::process::id(), - std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_nanos()) - .unwrap_or(0) - )); - std::fs::create_dir_all(&scratch) - .unwrap_or_else(|e| panic!("create scratch storage dir {scratch:?}: {e}")); - RustyPipe::builder() - .storage_dir(&scratch) - .build() - .unwrap_or_else(|e| panic!("build RustyPipe with scratch={scratch:?}: {e}")) -} - -/// Sanity: iOS path returns stream URLs and never touches the deobf code. -#[rstest] -#[tokio::test] -async fn ios_player_returns_streams(rp: RustyPipe) { - let pd = rp - .query() - .player_from_client(TEST_VIDEO_ID, ClientType::Ios) - .await - .expect("iOS player_from_client should succeed"); - - assert_eq!(pd.details.id, TEST_VIDEO_ID); - assert!( - !pd.video_streams.is_empty() || !pd.video_only_streams.is_empty(), - "expected at least one video stream" - ); - assert!( - !pd.audio_streams.is_empty(), - "expected at least one audio stream" - ); -} - -/// TV path exercises the `needs_deobf=true` branch: the sig_timestamp request -/// payload is required, but the soft-fail patch keeps the call alive even when -/// sig_fn/nsig_fn regex extraction fails on a rotated player.js. -/// -/// YouTube IP-bans some shared egress IPs (datacenters, LAN-routed servers) -/// for the TV client with "Sign in to confirm you're not a bot". That's -/// environmental — match it precisely on the `UnavailabilityReason` enum -/// instead of substring-matching the rendered error so a real regression -/// can't sneak past the catch arm. -#[rstest] -#[tokio::test] -async fn tv_player_returns_streams(rp: RustyPipe) { - match rp - .query() - .player_from_client(TEST_VIDEO_ID, ClientType::Tv) - .await - { - Ok(pd) => { - assert_eq!(pd.details.id, TEST_VIDEO_ID); - assert!( - !pd.video_streams.is_empty() || !pd.video_only_streams.is_empty(), - "TV path returned no video streams" - ); - // Symmetric with iOS / default-order tests so a regression that - // silently drops the audio adaptation set can't pass here. - assert!( - !pd.audio_streams.is_empty(), - "TV path returned no audio streams" - ); - } - Err(Error::Extraction(ExtractionError::Unavailable { - reason: UnavailabilityReason::IpBan, - .. - })) => { - eprintln!( - "TV path skipped: YT IpBan on this egress (expected on shared/datacenter IPs)" - ); - } - Err(e) => panic!("TV path failed for a non-environmental reason: {e}"), - } -} - -/// The patched default-client order should pick iOS as primary and return -/// playable streams in the absence of botguard signing. -#[rstest] -#[tokio::test] -async fn default_client_order_returns_streams(rp: RustyPipe) { - let order = rp.query().player_client_order(); - eprintln!("default client order (no botguard): {order:?}"); - assert_eq!( - order[0], - ClientType::Ios, - "iOS should be the no-botguard primary" - ); - - let pd = rp - .query() - .player(TEST_VIDEO_ID) - .await - .expect("default-clients player() should succeed"); - - assert_eq!(pd.details.id, TEST_VIDEO_ID); - assert!( - !pd.video_streams.is_empty() || !pd.video_only_streams.is_empty(), - "expected at least one video stream from the default-clients path" - ); - assert!( - !pd.audio_streams.is_empty(), - "expected at least one audio stream from the default-clients path" - ); - - // Probe one returned audio stream to confirm YT actually serves it. - // GET with Range 0-1023 + an iOS User-Agent because YT's googlevideo - // CDN tends to 403 HEAD requests and UA mismatches. - let stream_url = pd - .audio_streams - .first() - .expect("at least one audio stream") - .url - .clone(); - eprintln!( - "probing first audio URL: {}", - &stream_url[..stream_url.len().min(180)] - ); - let client = reqwest::Client::builder() - .user_agent( - "com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1 like Mac OS X; en_US)", - ) - .build() - .unwrap(); - let resp = client - .get(&stream_url) - .header("Range", "bytes=0-1023") - .send() - .await - .expect("GET request to YT CDN should not error"); - let status = resp.status(); - let body_len = resp.bytes().await.map(|b| b.len()).unwrap_or(0); - eprintln!("response: {body_len} bytes, status {status}"); - assert!( - status.is_success() || status.is_redirection(), - "audio URL Range-GET returned non-OK status: {status} (body={body_len} bytes; URL may need visitor_data or po_token)" - ); - assert!( - body_len > 0, - "audio URL returned OK but zero bytes — likely a sig-required URL we couldn't deobf" - ); -}