Port NewPipeExtractor's JS pipeline: player.js fetch + cache, sig and
nsig function extraction, deobfuscation, sticky-error caching.
src/youtube/js/
* runtime.rs — rquickjs wrapper (mirrors utils/JavaScript.java)
compile_or_throw + run(snippet, name, parameter)
* lexer.rs — match_to_closing_brace via the `ress` JS scanner
(NPE's lexer is derived from the same crate
upstream)
* extractor.rs — iframe_api → embed page fallback for player.js
URL, regex-driven hash extraction, clean-and-fetch
* signature.rs — 6 sig fn name regexes (front-most-recent),
deobf-function-body via lexer w/ regex fallback,
helper-object + global-string-array extraction,
signatureTimestamp, snippet assembler
* nsig.rs — 8 nsig fn name regexes (incl. array-indirection),
body via lexer w/ regex fallback, fixupFunction
early-return strip
* player_manager.rs — orchestrator + sticky-error cache mirroring
YoutubeJavaScriptPlayerManager
PORT DEVIATIONS from NPE (each flagged in code):
* dropped the 6th sig fn name regex (used Java backref \2; Rust's
`regex` crate is backtracking-free, so we substitute a loose form
that NPE itself half-broke per audit Track B §2.1)
* dropped the Java atomic group `(?>...)` from helper-object regex —
Rust's NFA is already linear-time
* nsig fixup substitutes `(?:"undefined"|'undefined')` for the
\1 backref; harmless loosening
* sig and nsig assembled snippets prepend `var` — QuickJS rejects
bare-assignment to undeclared identifiers; NPE relied on Rhino's
non-strict mode
Tests:
* 43 lib unit tests (up from 7 in Phase 1)
* 7 Phase 2 offline integration tests against a hand-crafted
minified synthetic player.js — exercises the full sig pipeline
(build_deobfuscator → runtime::run) and nsig fixup_function
* 7 Phase 1 live smoke tests still green
57/57 total green.
104 lines
4.9 KiB
Rust
104 lines
4.9 KiB
Rust
// Phase 2 offline smoke — exercises the full JS deobfuscator pipeline
|
|
// against a hand-crafted minified player.js. No network. The synthetic
|
|
// player.js below replicates the shape of real YT player.js:
|
|
// * a global string array (split on `;`)
|
|
// * a helper object (Pj) with reverse/swap/splice methods
|
|
// * a sig function (xyz) that calls helper methods via BRACKET access
|
|
// * a signatureTimestamp constant
|
|
// * a throttling-parameter function (nsig) with an early-return guard
|
|
// that fixup_function must strip
|
|
//
|
|
// Asserts:
|
|
// * sig pipeline produces a deterministic non-identity output
|
|
// * signatureTimestamp parses
|
|
// * url_with_throttling_parameter_deobfuscated round-trip changes &n=
|
|
// and caches the result
|
|
|
|
use strawcore::youtube::js::{signature, nsig, runtime, DeobfError};
|
|
|
|
// Synthetic minified player.js — replicates the shape of real YT player.js.
|
|
//
|
|
// Anchors each subsystem hits:
|
|
// * global array → `var Z="aa;bb;...".split(";")`
|
|
// * sig helper → `var Pj={rv:fn,sp:fn,sw:fn}` + bracket-access call
|
|
// * sig name regex 5 → `xyz=function(a){a=a.split("")...`
|
|
// * sig timestamp → `signatureTimestamp:20243`
|
|
// * nsig name regex 0 → `m85=function(p){...return Z[1]}` matches the
|
|
// classic-return-array-element shape; we tail the body with a
|
|
// `return Z[1]` so the regex anchors (but reverse() runs first so the
|
|
// actual output is determined by the reverse).
|
|
// nsig regex 0 is greedy `<name>=function.*return [A-Z]\[\d+\]`, so m85
|
|
// must appear in the source BEFORE xyz — otherwise the leftmost match
|
|
// starts at xyz and the greedy `.*` consumes through to m85's
|
|
// `return Z[1]`, miscapturing the name as "xyz". Real player.js naturally
|
|
// orders these the right way; mirror that here.
|
|
const SYNTHETIC_PLAYER_JS: &str = r#"var Z="aa;bb;cc;dd;ee;ff".split(";");var Pj={rv:function(a){a.reverse();},sp:function(a,b){a.splice(0,b);},sw:function(a,b){var c=a[0];a[0]=a[b%a.length];a[b%a.length]=c;}};m85=function(p){var b=1;if(typeof RUQ==="undefined")return p;var a=p.split("");a.reverse();return Z[1];};xyz=function(a){a=a.split("");Pj["rv"](a);Pj["sw"](a,1);return a.join("");};var foo={signatureTimestamp:20243};"#;
|
|
|
|
#[test]
|
|
fn sig_pipeline_end_to_end() {
|
|
let snippet = signature::build_deobfuscator(SYNTHETIC_PLAYER_JS).expect("build");
|
|
let out = runtime::run(&snippet, signature::DEOBFUSCATION_FUNCTION_NAME, "abcdef").unwrap();
|
|
// sig is rv + sw(1) — reverse then swap[0] with [1].
|
|
// "abcdef" -> reverse -> "fedcba" -> swap[0,1] -> "efdcba"
|
|
assert_eq!(out, "efdcba");
|
|
}
|
|
|
|
#[test]
|
|
fn sig_pipeline_is_deterministic() {
|
|
let snippet = signature::build_deobfuscator(SYNTHETIC_PLAYER_JS).unwrap();
|
|
let a = runtime::run(&snippet, signature::DEOBFUSCATION_FUNCTION_NAME, "hello!").unwrap();
|
|
let b = runtime::run(&snippet, signature::DEOBFUSCATION_FUNCTION_NAME, "hello!").unwrap();
|
|
assert_eq!(a, b);
|
|
}
|
|
|
|
#[test]
|
|
fn signature_timestamp_extracted() {
|
|
let ts = signature::signature_timestamp(SYNTHETIC_PLAYER_JS).unwrap();
|
|
assert_eq!(ts, 20243);
|
|
}
|
|
|
|
#[test]
|
|
fn nsig_fixup_strips_early_return_and_runs() {
|
|
let (name, body) = nsig::build_deobfuscator(SYNTHETIC_PLAYER_JS).unwrap();
|
|
assert_eq!(name, "m85");
|
|
assert!(
|
|
!body.contains("typeof RUQ"),
|
|
"fixup_function should have stripped the early-return guard, got: {body}"
|
|
);
|
|
// m85's body now ends with `return Z[1]` (to anchor regex 0). When
|
|
// running standalone (i.e. without Z in scope) this would throw —
|
|
// but the assembled-snippet path is not used here; we run just the
|
|
// function body. To make this runnable, prepend Z to the global
|
|
// scope of the QuickJS runtime.
|
|
// build_deobfuscator already added the `var` prefix; just inject the
|
|
// Z global the m85 body references.
|
|
let snippet = format!(r#"var Z=["aa","bb","cc","dd"];{body}"#);
|
|
let out = runtime::run(&snippet, &name, "input!").unwrap();
|
|
// m85 returns Z[1] regardless of input, since the early-return guard
|
|
// is now stripped.
|
|
assert_eq!(out, "bb");
|
|
}
|
|
|
|
#[test]
|
|
fn nsig_returns_input_unchanged_without_n_param() {
|
|
let url = "https://x.googlevideo.com/?foo=1&bar=baz";
|
|
assert!(nsig::throttling_parameter_from_url(url).is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn nsig_extracts_obfuscated_value_when_present() {
|
|
let url = "https://x.googlevideo.com/?foo=1&n=ABC123&bar=baz";
|
|
assert_eq!(nsig::throttling_parameter_from_url(url).as_deref(), Some("ABC123"));
|
|
}
|
|
|
|
#[test]
|
|
fn missing_sig_helper_returns_err() {
|
|
// Sig name matches regex 5 (`<name>=function(a){a=a.split("")...`) but
|
|
// the body contains no helper-object call, so SIG_DEOBF_HELPER_OBJ_NAME
|
|
// misses → SigHelperMissing.
|
|
let bad = r#"var Z="a".split(";");xyz=function(a){a=a.split("");return a.join("");};"#;
|
|
match signature::build_deobfuscator(bad) {
|
|
Err(DeobfError::SigHelperMissing) => (),
|
|
other => panic!("expected SigHelperMissing, got {other:?}"),
|
|
}
|
|
}
|