diff --git a/Cargo.toml b/Cargo.toml index ae8bbec..39cb949 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "rustypipe" -version = "0.11.4" +version = "0.11.5" rust-version = "1.67.1" edition.workspace = true authors.workspace = true diff --git a/docs/PORTING_NPE_PIPELINE.md b/docs/PORTING_NPE_PIPELINE.md new file mode 100644 index 0000000..f1311c6 --- /dev/null +++ b/docs/PORTING_NPE_PIPELINE.md @@ -0,0 +1,123 @@ +# Porting NPE's player-JS pipeline into rustypipe + +**Branch:** `kayos/m1-sig-port` +**Goal:** Replace `src/deobfuscate.rs`'s narrow regex approach with +NewPipeExtractor's full pipeline so the fork keeps working as YouTube +rotates its `player_ias.vflset/.../base.js`. + +## The diagnosis + +Upstream rustypipe 0.11.4 (June 2025) extracts the signature +deobfuscation function with six regex patterns aimed at the call site +(`var&&(var=SIGFN(decodeURIComponent(var)))`). On current YouTube player +`c2f7551f` (May 2026) all six miss. NewPipeExtractor master's six +patterns also miss on the same file — and NPE-master's nsig (throttling) +pipeline is openly broken (`TeamNewPipe/NewPipeExtractor#1339`, open +since 2026-02-03; the dev branch has had no sig/nsig commits in 60 +days). The reason NPE *appears* to work in apps is that the +Innertube paths for Android / iOS / TV clients return stream URLs that +don't carry an obfuscated `s=` signature for most videos — sig deobf +is a fallback the typical playback path never reaches. + +Two structural changes have happened since rustypipe was last cut: + +1. **The sig fn call site now sometimes takes a numeric prefix arg.** + New shape: `var&&(var=SIGFN(123,decodeURIComponent(var)))`. NPE's + regex set has one pattern for this; rustypipe doesn't. + +2. **YT routes literal token references through a global string array.** + Near the top of every recent `player.js`: + ```js + var e="startsWith{redirector.googlevideo.com{split{...{decodeURIComponent{...".split("{") + ``` + Calls then reference `e[N]` instead of the literal symbol. So an + anchor like `decodeURIComponent` is no longer present at the sig-fn + call site as text — it's `e[37]` (or whatever the index is). + +NPE's pipeline handles (1) but not (2). To make the fork robust we +do both. + +## What we're porting + +| NPE file | Rust target | Notes | +|---|---|---| +| `YoutubeSignatureUtils.java` | `src/deobfuscate.rs` (rewritten) | Sig fn name + body + helper-obj + global-var assembly | +| `YoutubeThrottlingParameterUtils.java` | new `src/deobfuscate/throttling.rs` module | nsig fn name + body + early-return fixup | +| `utils/jsextractor/JavaScriptExtractor.matchToClosingBrace` | new `src/deobfuscate/jslexer.rs` | Find a `name=function` site, walk braces until balanced | +| `YoutubeJavaScriptPlayerManager.java` | already covered by rustypipe's `cache.rs` | We keep rustypipe's cache shape but extend the cached payload to include nsig fn + global var | + +## Pipeline (the desired flow) + +``` +player.js (string) + │ + ├── extract_sig_fn_name // 6+ regex patterns, w/ globalVar[N] retry + │ │ + │ └── fall back to: // globalVar[N] indirection + │ 1. extract_global_string_array_indices() + │ 2. find N where arr[N] == "decodeURIComponent" + │ 3. re-run patterns with `(?:decodeURIComponent|globalVar\[N\])` + │ + ├── extract_sig_fn_body // lexer brace-walk, regex fallback + ├── extract_global_var // var X="...".split("{") (verbatim) + ├── extract_helper_obj_name // from inside fn body: [;,]NAME[.. + ├── extract_helper_obj_body // var NAME={...}; + └── assemble: + globalVar + ";" + helperObj + ";" + deobfFn + ";" + callerFn + ── eval in rquickjs ──→ deobf_sig(input) ⇒ deobf(input) + +player.js (string) + │ + ├── extract_nsig_fn_name // 7 NPE patterns including arr-index variants + │ │ + │ └── if array variant: resolve var NAME=[fn1,fn2,fnN] + │ + ├── extract_nsig_fn_body // lexer brace-walk + ├── fixup_early_return // strip `if(typeof X==="undefined")return arg;` + └── eval in rquickjs ──→ deobf_nsig(input) ⇒ deobf(input) +``` + +## Milestones + +| ID | Subject | Effort | Gate | +|---|---|---|---| +| M1.1 | Port `matchToClosingBrace` (clean brace walker) to `src/deobfuscate/jslexer.rs` | S | Standalone unit test against a tiny `var Wka=function(d){return /,/}/` fixture | +| M1.2 | Replace `get_sig_fn_name` with NPE's 6 patterns (including `(\d+,)decodeURIComponent`) | S | T-1 fixture is the prior-working `9216d1f7` player + new fixture `c2f7551f.js` | +| M1.3 | Add `extract_global_string_array` returning `(var_name, Vec)` | S | unit test for the `var e="…".split("{")` shape | +| M1.4 | Add `extract_helper_obj_name` from fn body + `extract_helper_obj_body` | S | unit test against the `qB={w8:..,EC:..,Np:..}` style fixture | +| M1.5 | Assemble globalVar + helperObj + sigFn + caller; round-trip via rquickjs | M | the existing `t_deobfuscate_sig` test fixture passes via new code path | +| M1.6 | Add globalVar[N] indirection retry to sig fn name extraction | M | new test: a fixture where the call site uses `e[N]` instead of `decodeURIComponent` | +| M1.7 | Port nsig pipeline (`YoutubeThrottlingParameterUtils`) — 7 patterns + array-resolution + early-return fixup | M | port + run NPE's `nsig_tests` table in `tests/sig_tests.rs` | +| M1.8 | Add live integration test downloading current `player.js` and asserting round-trip end-to-end | S | `cargo test --features live -- t_update` | +| M1.9 | Bump `Cargo.toml` to `0.12.0-sulkta.1`, tag, push to `Sulkta-Coop/rustypipe` `kayos/m1-sig-port` | S | clean release | + +## Not in M1 (parking lot) + +- Deno / external-JS-runtime swap (yt-dlp's path; we revisit if M1 + doesn't hold). +- Caching the assembled deobf code across processes (cookie-jar style + on Android). +- N-tier fallback against multiple geo `player.js` variants if YT ever + splits them. + +## Why this is safe-ish to ship + +NPE's pipeline is what straw v0.1.0-X currently relies on for the rare +videos that hit the sig path. Porting it 1:1 to Rust gives us a +behavioural baseline equivalent to what NPE provides — no regression +from the Java side. The globalVar[N] indirection added in M1.6 is the +forward-looking piece that handles current `c2f7551f`-style +obfuscation NPE doesn't yet handle. If M1.6 turns out unnecessary +(e.g. NPE-dev lands its own fix first), we can pull the patterns into +parity but keep our generalised resolution layer. + +## Tracking + +Workspace task IDs: +- `#226` parent — fork + ship the patched fork +- `#230` audit + port the sig pipeline (this milestone) +- `#231` build pipeline + crafting-table integration + +When M1 lands, U-2..U-5 revival becomes a `Cargo.toml` dep flip in +`rust/strawcore/` + cherry-pick of the parked commits +(`7ff5ac79e..a13896f5e` on `Sulkta-Coop/straw`). diff --git a/src/client/mod.rs b/src/client/mod.rs index 06386bc..f3304f0 100644 --- a/src/client/mod.rs +++ b/src/client/mod.rs @@ -100,7 +100,11 @@ impl ClientType { } fn needs_deobf(self) -> bool { - !matches!(self, ClientType::Ios) + // Android + iOS InnerTube paths return pre-signed stream URLs (no &s= cipher, + // no &n= throttling param), so they don't need player.js deobfuscation at all. + // Skipping the deobf fetch here keeps the player path alive even when YouTube + // rotates the player.js to a shape our extractor doesn't recognise. + !matches!(self, ClientType::Ios | ClientType::Android) } fn needs_po_token(self) -> bool { diff --git a/src/client/player.rs b/src/client/player.rs index 9bae601..1c5567d 100644 --- a/src/client/player.rs +++ b/src/client/player.rs @@ -245,8 +245,14 @@ impl RustyPipeQuery { /// The order may change in the future in case YouTube applies changes to their /// platform that disable a client or make it less reliable. pub fn player_client_order(&self) -> &'static [ClientType] { + // Default to iOS first — it skips player.js deobfuscation entirely (pre-signed + // stream URLs) AND doesn't require device attestation the way the Android + // client does. Tv is the secondary fallback (it does need a sig_timestamp + // request param, but its responses are typically OK). Android is included + // when botguard/po_token signing is wired because then we can satisfy YT's + // device attestation requirement. if self.client.inner.botguard.is_some() { - &[ClientType::Desktop, ClientType::Ios, ClientType::Tv] + &[ClientType::Ios, ClientType::Android, ClientType::Tv, ClientType::Desktop] } else { &[ClientType::Ios, ClientType::Tv] } diff --git a/src/deobfuscate.rs b/src/deobfuscate.rs index d08a6e1..54b5201 100644 --- a/src/deobfuscate.rs +++ b/src/deobfuscate.rs @@ -61,10 +61,34 @@ impl DeobfData { } pub fn extract_fns(js_url: &str, player_js: &str) -> Result { - let sig_fn = get_sig_fn(player_js)?; - let nsig_fn = get_nsig_fn(player_js)?; + // The signature timestamp is the only piece every "needs_deobf" client + // actually requires in its request payload — without it, those clients + // get an error back. So we hard-fail on sts extraction. let sts = get_sts(player_js)?; + // sig_fn and nsig_fn are needed only when YouTube returns stream URLs + // containing the &s= cipher / &n= throttling params. Most clients + // (iOS, Android, Tv) get pre-signed URLs and never touch these. + // Tolerate extraction failures here so a single rotated player.js + // shape doesn't bring down the whole player path for those clients. + // The dead-code fallback is preserved: if a stream URL DOES need + // deobfuscation, `Deobfuscator::deobfuscate_sig` will fail with a + // clear "sig fn unavailable" error instead of crashing the player. + let sig_fn = match get_sig_fn(player_js) { + Ok(f) => f, + Err(e) => { + tracing::warn!("could not extract sig deobf fn (sig deobfuscation disabled until YT rotates player.js again): {}", e); + String::new() + } + }; + let nsig_fn = match get_nsig_fn(player_js) { + Ok(f) => f, + Err(e) => { + tracing::warn!("could not extract nsig deobf fn (throttling parameter deobf disabled until YT rotates player.js again): {}", e); + String::new() + } + }; + Ok(Self { js_url: js_url.to_owned(), sig_fn, @@ -79,13 +103,23 @@ impl Deobfuscator { pub fn new(data: &DeobfData) -> Result { let rt = Runtime::new()?; let ctx = Context::full(&rt)?; - ctx.with(|ctx| { - let mut opts = rquickjs::context::EvalOptions::default(); - opts.strict = false; - ctx.eval_with_options::<(), _>(data.sig_fn.as_bytes(), opts)?; - let mut opts = rquickjs::context::EvalOptions::default(); - opts.strict = false; - ctx.eval_with_options::<(), _>(data.nsig_fn.as_bytes(), opts) + ctx.with(|ctx| -> Result<(), rquickjs::Error> { + // Skip JS eval for any deobf fn we couldn't extract. The matching + // `deobfuscate_sig` / `deobfuscate_nsig` calls will then return an + // Err naturally because the global won't be defined — and that + // only matters if a stream actually has obfuscated params, which + // shouldn't happen on the iOS/Android/Tv InnerTube paths. + if !data.sig_fn.is_empty() { + let mut opts = rquickjs::context::EvalOptions::default(); + opts.strict = false; + ctx.eval_with_options::<(), _>(data.sig_fn.as_bytes(), opts)?; + } + if !data.nsig_fn.is_empty() { + let mut opts = rquickjs::context::EvalOptions::default(); + opts.strict = false; + ctx.eval_with_options::<(), _>(data.nsig_fn.as_bytes(), opts)?; + } + Ok(()) })?; Ok(Self { ctx }) } diff --git a/tests/sulkta_smoke.rs b/tests/sulkta_smoke.rs new file mode 100644 index 0000000..a8ef06d --- /dev/null +++ b/tests/sulkta_smoke.rs @@ -0,0 +1,142 @@ +//! Sulkta-fork smoke tests for the player pipeline. +//! +//! Verifies the patched default client order (`Ios, Tv` without botguard) plus +//! the soft-fail DeobfData::extract works against current YouTube player.js. +//! +//! Run with: `cargo test --test sulkta_smoke -- --nocapture` + +use rstest::{fixture, rstest}; +use rustypipe::client::{ClientType, RustyPipe}; + +/// A stable, long-running, public-domain music video. Used by upstream +/// tests too (`n4tK7LYFxI0` = Spektrem - Shine, NCS). +const TEST_VIDEO_ID: &str = "n4tK7LYFxI0"; + +#[fixture] +fn rp() -> RustyPipe { + RustyPipe::builder() + .storage_dir(env!("CARGO_MANIFEST_DIR")) + .build() + .unwrap() +} + +/// Sanity: iOS path returns stream URLs and never touches the deobf code. +#[rstest] +#[tokio::test] +async fn ios_player_returns_streams(rp: RustyPipe) { + let pd = rp + .query() + .player_from_client(TEST_VIDEO_ID, ClientType::Ios) + .await + .expect("iOS player_from_client should succeed"); + + assert_eq!(pd.details.id, TEST_VIDEO_ID); + assert!( + !pd.video_streams.is_empty() || !pd.video_only_streams.is_empty(), + "expected at least one video stream" + ); + assert!( + !pd.audio_streams.is_empty(), + "expected at least one audio stream" + ); +} + +/// TV path exercises the `needs_deobf=true` branch: the sig_timestamp request +/// payload is required, but the soft-fail patch keeps the call alive even when +/// sig_fn/nsig_fn regex extraction fails on a rotated player.js. +/// +/// YouTube IP-bans some shared egress IPs (datacenters, LAN-routed servers) +/// for the TV client with "Sign in to confirm you're not a bot". That's +/// environmental, not a rustypipe regression, so we tolerate it here as long +/// as the error is recognisable. +#[rstest] +#[tokio::test] +async fn tv_player_returns_streams(rp: RustyPipe) { + match rp + .query() + .player_from_client(TEST_VIDEO_ID, ClientType::Tv) + .await + { + Ok(pd) => { + assert_eq!(pd.details.id, TEST_VIDEO_ID); + assert!( + !pd.video_streams.is_empty() || !pd.video_only_streams.is_empty(), + "TV path returned no video streams" + ); + } + Err(e) => { + let msg = format!("{e}"); + assert!( + msg.contains("Sign in") || msg.contains("IpBan") || msg.contains("bot"), + "TV path failed for a non-environmental reason: {msg}" + ); + eprintln!("TV path skipped: YT IP-banned this egress (expected on shared/datacenter IPs)"); + } + } +} + +/// The patched default-client order should pick iOS as primary and return +/// playable streams in the absence of botguard signing. +#[rstest] +#[tokio::test] +async fn default_client_order_returns_streams(rp: RustyPipe) { + let order = rp.query().player_client_order(); + eprintln!("default client order (no botguard): {order:?}"); + assert_eq!( + order[0], + ClientType::Ios, + "iOS should be the no-botguard primary" + ); + + let pd = rp + .query() + .player(TEST_VIDEO_ID) + .await + .expect("default-clients player() should succeed"); + + assert_eq!(pd.details.id, TEST_VIDEO_ID); + assert!( + !pd.video_streams.is_empty() || !pd.video_only_streams.is_empty(), + "expected at least one video stream from the default-clients path" + ); + assert!( + !pd.audio_streams.is_empty(), + "expected at least one audio stream from the default-clients path" + ); + + // Probe one returned audio stream to confirm YT actually serves it. + // GET with Range 0-1023 + an iOS User-Agent because YT's googlevideo + // CDN tends to 403 HEAD requests and UA mismatches. + let stream_url = pd + .audio_streams + .first() + .expect("at least one audio stream") + .url + .clone(); + eprintln!("probing first audio URL: {}", &stream_url[..stream_url.len().min(180)]); + let client = reqwest::Client::builder() + .user_agent( + "com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1 like Mac OS X; en_US)", + ) + .build() + .unwrap(); + let resp = client + .get(&stream_url) + .header("Range", "bytes=0-1023") + .send() + .await + .expect("GET request to YT CDN should not error"); + let status = resp.status(); + let body_len = resp.bytes().await.map(|b| b.len()).unwrap_or(0); + eprintln!("response: {} bytes, status {}", body_len, status); + assert!( + status.is_success() || status.is_redirection(), + "audio URL Range-GET returned non-OK status: {} (body={} bytes; URL may need visitor_data or po_token)", + status, + body_len + ); + assert!( + body_len > 0, + "audio URL returned OK but zero bytes — likely a sig-required URL we couldn't deobf" + ); +}