Public-flip audit: scrub audit-ticket prefixes + LAN refs + tighten README

URLs → git.sulkta.com. Audit-ticket prefixes (SPEC §N, audit Track X, vc=N
audit-fix, FIX (audit ...), PORT DEVIATION) stripped from comments — technical
reasoning retained. Crafting-table LAN refs softened to 'Sulkta build host'.
README sheds marketing scaffolding + stale status tables.
This commit is contained in:
Cobb Hayes 2026-05-27 13:29:52 -07:00
parent 75bc7dc6bf
commit c8dfc8a34a
24 changed files with 82 additions and 108 deletions

View file

@ -8,9 +8,9 @@
// URL, walks `endpoint.browseEndpoint.browseId` to get the UC... id, and
// retries the browse call. Up to 3 redirect hops.
//
// Tab parsing (videos/shorts/live/playlists) is in audit Track D §5 —
// `tabs[].tabRenderer.endpoint.browseEndpoint.params` is the magic
// base64 needed to land on each tab.
// Tab parsing (videos/shorts/live/playlists): the magic base64 needed to
// land on each tab lives at
// `tabs[].tabRenderer.endpoint.browseEndpoint.params`.
use serde_json::Value;
@ -88,8 +88,8 @@ pub fn resolve_handle_to_channel_id(url_fragment: &str) -> Result<String, Extrac
}
/// Magic params for the channel "Videos" tab — opaque base64. Same constant
/// NPE uses (audit Track A §2.4). Sending it with the channel browseId
/// switches YT's response from the Home tab to the Videos tab.
/// NPE uses. Sending it with the channel browseId switches YT's response
/// from the Home tab to the Videos tab.
const CHANNEL_VIDEOS_TAB_PARAMS: &str = "EgZ2aWRlb3PyBgQKAjoA";
pub fn fetch_channel_browse(channel_id: &str) -> Result<ChannelInfo, ExtractionError> {

View file

@ -2,9 +2,9 @@
// InnertubeClientRequestInfo.java + the prepareJsonBuilder() flow in
// YoutubeParsingHelper.java:1494-1559.
//
// Wire-order matters (audit Track A §2.1) — `serde_json::json!` macro
// preserves insertion order in the resulting Map, but we use a sequence
// of `.insert()` calls into a `serde_json::Map` to be explicit.
// Wire-order matters — `serde_json::json!` macro preserves insertion
// order in the resulting Map, but we use a sequence of `.insert()`
// calls into a `serde_json::Map` to be explicit.
use serde_json::{json, Map, Value};
@ -114,7 +114,7 @@ impl InnertubeClientRequestInfo {
}
/// Builds the InnerTube request envelope mirroring NPE prepareJsonBuilder.
/// Insertion order matches NPE's wire-order verbatim (audit Track A §2.1).
/// Insertion order matches NPE's wire-order verbatim.
pub fn build_envelope(
info: &InnertubeClientRequestInfo,
localization: &Localization,

View file

@ -30,8 +30,8 @@ pub const ANDROID_CLIENT_ID: &str = "3";
pub const ANDROID_CLIENT_NAME: &str = "ANDROID";
pub const ANDROID_CLIENT_VERSION: &str = "21.03.36";
// PARITY: NPE hard-codes androidSdkVersion=36 + osVersion=16 even though
// the User-Agent advertises Android 15. DroidGuard doesn't check the
// NPE hard-codes androidSdkVersion=36 + osVersion=16 even though the
// User-Agent advertises Android 15. DroidGuard doesn't check the
// InnerTube context so this mismatch is intentional and not a bug.
pub const ANDROID_SDK_VERSION: u32 = 36;
pub const ANDROID_OS_VERSION: &str = "16";

View file

@ -172,9 +172,7 @@ mod tests {
#[test]
fn table_has_57_entries() {
// Audit Track A §7 says "53" in prose but tallies the same 57
// entries below. NPE source ItagItem.java has 57 distinct itag
// IDs. Matches exactly.
// NPE source ItagItem.java has 57 distinct itag IDs. Matches exactly.
assert_eq!(ITAG_TABLE.len(), 57);
}

View file

@ -5,7 +5,7 @@
// 1. iframe_api regex (primary)
// 2. embed/<videoId> page — Jsoup script-tag walk + jsUrl regex fallback
//
// PARITY: we deliberately reproduce NPE's bug where `select("script")
// We deliberately reproduce NPE's bug where `select("script")
// .attr("name", "player/base")` *mutates* the script tags and iterates ALL
// of them. The intent was "find the script with name=player/base" but
// Jsoup's attr-setter doesn't filter. Our walk does the same — iterate
@ -77,7 +77,7 @@ fn extract_from_embed(downloader: &dyn Downloader, video_id: &str) -> Result<Str
.map_err(|e| DeobfError::FetchEmbed(e.to_string()))?;
let body = resp.response_body();
// PARITY: NPE iterates every <script> tag (the `.attr("name","player/base")`
// NPE iterates every <script> tag (the `.attr("name","player/base")`
// call sets an attribute rather than filtering). We do the same.
for caps in SCRIPT_TAG.captures_iter(body) {
if let Some(src) = caps.get(1) {

View file

@ -1,7 +1,7 @@
// Throttling-parameter (nsig / `n=` URL param) deobfuscation function
// extraction. Mirrors NPE services/youtube/YoutubeThrottlingParameterUtils.java.
//
// Flow per audit Track B §3:
// Flow:
// 1. Quick check: if URL doesn't contain `&n=` or `?n=`, return None.
// (60-900× perf win — load-bearing, NPE adds this 2025-07-10.)
// 2. Walk DEOBFUSCATION_FUNCTION_NAME_REGEXES — first match wins.
@ -41,9 +41,9 @@ pub fn throttling_parameter_from_url(url: &str) -> Option<String> {
}
/// Returns `(function_name, assembled_snippet)`. The snippet declares
/// the function as `var <name> = function(...) { ... };` (explicit `var`
/// is a PORT DEVIATION — NPE relies on Rhino's non-strict bare-assignment
/// behavior; QuickJS rejects it).
/// the function as `var <name> = function(...) { ... };` — the explicit
/// `var` differs from NPE, which relies on Rhino's non-strict
/// bare-assignment behavior. QuickJS rejects bare assignment.
pub fn build_deobfuscator(player_code: &str) -> Result<(String, String), DeobfError> {
let name = deobfuscation_function_name(player_code)?;
let body = deobfuscation_function_body(player_code, &name)?;

View file

@ -2,7 +2,7 @@
// Mirrors NPE services/youtube/YoutubeJavaScriptPlayerManager.java (the
// sole public class in the JS subsystem).
//
// Cache layout per audit Track B §5.3:
// Cache layout:
// * cached_player_code — process-lifetime, until clear_all_caches
// * cached_signature_timestamp
// * cached_sig_snippet — assembled JS, ready for runtime::run

View file

@ -3,7 +3,7 @@
// NPE's Rhino surface is 35 lines: compile_or_throw + run. We replicate
// the same shape on QuickJS via rquickjs.
//
// Mirroring decisions per audit Track B §4:
// Mirroring decisions:
// * One Runtime + Context per call. QuickJS contexts are cheap; this
// mirrors NPE's `Context.enter()` per call.
// * Context::full gives the ECMAScript built-ins (Array, String, Math)

View file

@ -1,7 +1,7 @@
// Signature (sig) deobfuscation function extraction.
// Mirrors NPE services/youtube/YoutubeSignatureUtils.java.
//
// Flow per audit Track B §2:
// Flow:
// 1. Walk FUNCTION_REGEXES — first match wins. Captures (a) function
// name (group 1) and optionally (b) additional-params prefix
// (group 2 on regex 0).
@ -11,7 +11,7 @@
// 5. Extract global string array.
// 6. Assemble: globalVar; helperObject; sigBody; function deobfuscate(a){return name(addlParams, a);}
//
// Also exposes the signature timestamp extraction (§2.7).
// Also exposes the signature timestamp extraction.
use once_cell::sync::Lazy;
use regex::Regex;
@ -29,18 +29,18 @@ pub const DEOBFUSCATION_FUNCTION_NAME: &str = "deobfuscate";
/// Group 2 = additional-params prefix on regex 0 (e.g. "43,"). For other
/// regexes group 2 (if present) is a backref or param name — NPE has a
/// latent bug where the groupCount>1 branch fires anyway, which we
/// faithfully reproduce per audit Track B §2.1.
/// faithfully reproduce.
static FUNCTION_REGEXES_SRC: &[&str] = &[
r#"\b(?:[a-zA-Z0-9_$]+)&&\((?:[a-zA-Z0-9_$]+)=([a-zA-Z0-9_$]{2,})\((\d+,)decodeURIComponent\((?:[a-zA-Z0-9_$]+)\)\)"#,
r#"\b(?:[a-zA-Z0-9_$]+)&&\((?:[a-zA-Z0-9_$]+)=([a-zA-Z0-9_$]{2,})\(decodeURIComponent\((?:[a-zA-Z0-9_$]+)\)\)"#,
r#"\bm=([a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)"#,
r#"\bc&&\(c=([a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)"#,
r#"(?:\b|[^a-zA-Z0-9$])([a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*\{\s*a\s*=\s*a\.split\(\s*""\s*\)"#,
// PORT DEVIATION: NPE's 6th regex uses Java backref `\2` to match
// the same param name twice. Rust's `regex` crate doesn't support
// backrefs (linear-time NFA). Dropping it. Audit Track B §2.1 flags
// this same regex as having a latent groupCount bug — the loss is
// a fallback path that NPE itself half-broke.
// NPE's 6th regex uses Java backref `\2` to match the same param
// name twice. Rust's `regex` crate doesn't support backrefs
// (linear-time NFA). Dropped — NPE's same regex also has a latent
// groupCount bug, so what we lose is a fallback path NPE itself
// half-broke.
r#"([a-zA-Z0-9$]+)\s*=\s*function\([a-zA-Z0-9$]+\)\s*\{\s*[a-zA-Z0-9$]+\s*=\s*[a-zA-Z0-9$]+\.split\(""\)\s*;"#,
];
@ -48,16 +48,15 @@ static FUNCTION_REGEXES: Lazy<Vec<Regex>> = Lazy::new(|| {
FUNCTION_REGEXES_SRC.iter().map(|s| Regex::new(s).unwrap()).collect()
});
// PARITY: NPE's helper-object body regex uses Java atomic group `(?>...)`.
// NPE's helper-object body regex uses Java atomic group `(?>...)`.
// Rust's `regex` crate is backtracking-free already, so we drop the
// atomic marker. See audit Track B §2.3.
// atomic marker.
static SIG_DEOBF_HELPER_OBJ_NAME: Lazy<Regex> =
Lazy::new(|| Regex::new(r"[;,]([A-Za-z0-9_$]{2,})\[..").unwrap());
static SIG_DEOBF_GLOBAL_ARRAY: Lazy<Regex> = Lazy::new(|| {
// `[A-z]` is the NPE-original loose ASCII range (covers A-Z, a-z, plus
// a handful of punctuation between). Audit Track B §2.4 calls this
// intentional. Kept verbatim.
// a handful of punctuation between). Intentional in NPE — kept verbatim.
Regex::new(r#"(var [A-z]=['"].*['"].split\("[;{]"\))"#).unwrap()
});
@ -74,10 +73,10 @@ pub fn deobfuscation_function_name_and_params(
if name.is_empty() {
continue;
}
// PARITY with NPE: if the regex has a group 2, treat it as a
// literal prefix even when it's actually a backref/param name.
// The resulting snippet would just fail to compile for those
// cases, falling through to the next attempt — same as NPE.
// If the regex has a group 2, treat it as a literal prefix
// even when it's actually a backref/param name — matches
// NPE. The resulting snippet would just fail to compile for
// those cases, falling through to the next attempt.
let extra = c.get(2).map(|m| m.as_str().to_string()).unwrap_or_default();
return Ok((name, extra));
}
@ -94,7 +93,7 @@ pub fn signature_timestamp(player_code: &str) -> Result<i32, DeobfError> {
}
/// Extracts the sig deobfuscation body. Tries lexer first, falls back to
/// the naive regex per NPE §2.2.
/// the naive regex.
pub fn deobfuscate_function_body(
player_code: &str,
function_name: &str,
@ -151,11 +150,11 @@ pub fn global_array(player_code: &str) -> Result<String, DeobfError> {
/// Assembles the final JS snippet — globalVar; helperObject; sigBody;
/// function deobfuscate(a) { return <name>(<extra>a); }
///
/// PORT DEVIATION from NPE: we prepend `var ` to the sig body so the
/// function name is an explicit global declaration. NPE relies on
/// Rhino's non-strict mode auto-creating globals from bare assignment
/// (`xyz=function(){}`). QuickJS treats undeclared-bare assignment as an
/// error. Functionally identical once the function is in scope.
/// We prepend `var ` to the sig body so the function name is an explicit
/// global declaration. NPE relies on Rhino's non-strict mode auto-
/// creating globals from bare assignment (`xyz=function(){}`); QuickJS
/// treats undeclared-bare assignment as an error. Functionally identical
/// once the function is in scope.
pub fn assemble_snippet(
global_var: &str,
helper_object: &str,

View file

@ -2,9 +2,9 @@
// resource categories. Mirrors NPE
// services/youtube/linkHandler/Youtube*LinkHandlerFactory.java.
//
// PORT SCOPE (per SPEC §6.6): we keep youtube.com / youtube-nocookie.com
// / youtu.be / m.youtube.com / music.youtube.com. The 27-host Invidious
// mirror list in NPE is dropped — Sulkta isn't an Invidious mirror.
// Accepted hosts: youtube.com / youtube-nocookie.com / youtu.be /
// m.youtube.com / music.youtube.com. The 27-host Invidious mirror list
// in NPE is dropped — strawcore isn't an Invidious mirror.
pub mod channel;
pub mod search;
@ -24,7 +24,7 @@ pub enum LinkError {
MalformedId(String),
}
/// The acceptable hosts for first-party YT links. Audit Track D §6.
/// The acceptable hosts for first-party YT links.
pub const ACCEPTED_HOSTS: &[&str] = &[
"youtube.com",
"www.youtube.com",

View file

@ -1,6 +1,6 @@
// YoutubeSearchQueryHandlerFactory + search filters. Filter params are
// opaque base64 protobufs — NPE doesn't decode them, just sends the
// magic strings. We mirror that. See audit Track D §3.
// magic strings. We mirror that.
//
// Music* filter variants were ported from NPE but never wired through
// (search_extractor rejected anything with `uses_music_endpoint()`).

View file

@ -55,7 +55,7 @@ pub fn youtube_post_headers() -> Vec<(String, String)> {
}
/// Mobile (Android/iOS) POST headers — UA + format-version only. No
/// X-YouTube-Client-Name, no Origin/Referer, no Cookie (audit Track A §6.2).
/// X-YouTube-Client-Name, no Origin/Referer, no Cookie.
pub fn mobile_post_headers(user_agent: &str) -> Vec<(String, String)> {
vec![
("Content-Type".into(), "application/json".into()),

View file

@ -7,7 +7,7 @@
// adapter that calls into PoTokenWebView (lifted verbatim from NewPipe
// app under GPL-3.0 compat).
//
// Two distinct token strings per call (audit Track E):
// Two distinct token strings per call:
// * player_request_po_token — goes into JSON body
// serviceIntegrityDimensions.poToken
// * streaming_data_po_token — goes into URL &pot=<...>
@ -17,11 +17,10 @@
// player request MUST send the same visitorData in context.client, or
// YT 403's the streaming URLs.
//
// FIX (audit Track E §2.2): NPE's Java API returns null both for
// "provider declined" and "provider errored." We split: Ok(None) =
// declined (no provider available for this client / video), Err = the
// provider tried and failed (the caller should still attempt extraction
// without po_token).
// NPE's Java API returns null both for "provider declined" and "provider
// errored." We split: Ok(None) = declined (no provider available for
// this client / video), Err = the provider tried and failed (the caller
// should still attempt extraction without po_token).
pub mod noop;

View file

@ -2,7 +2,7 @@
// services/youtube/extractors/YoutubeSearchExtractor.java.
//
// Calls /youtubei/v1/search with the WEB client (via desktop fast-path
// envelope). Body shape per audit Track D §3:
// envelope). Body shape:
// {
// "context": { "client": { ... } },
// "query": "<query>",

View file

@ -1,9 +1,9 @@
// YoutubeStreamExtractor — orchestrator. Mirrors NPE
// services/youtube/extractors/YoutubeStreamExtractor.java:onFetchPage().
//
// Order (per audit Track C §1.2):
// 1. Optional Android po_token from PoTokenProvider (Phase 5 wires this;
// until then we always go anonymous → reel endpoint).
// Order:
// 1. Optional Android po_token from PoTokenProvider (until a provider
// is registered we always go anonymous → reel endpoint).
// 2. Android `/player` (if po_token) or `/reel/reel_item_watch` (anon).
// checkPlayabilityStatus → typed ContentUnavailable variants.
// isPlayerResponseNotValid → reject the "you're a bot" decoy.
@ -12,7 +12,7 @@
// Exceptions swallowed → falls back to Android-response thumbnails.
// 5. WEB `/next` — description + related + chapters. Mandatory.
//
// Per-format URL post-processing (audit Track C §4.1):
// Per-format URL post-processing:
// * If format has `url` → use as-is (Android + iOS path).
// * Else parse `signatureCipher` → deobfuscate `s` → assemble
// `url&sp=<decoded>` (WEB path; not exercised in the current
@ -699,9 +699,8 @@ fn codec_from_mime(fmt: &Value) -> Option<String> {
Some(after[..end].to_string())
}
/// FIX (NPE deviation flagged in SPEC §5): dedup by itag id + delivery
/// method, NOT by `mediaFormat.id` — NPE's dedup collides itag 140 and
/// 141 because both are M4A.
/// Dedup by itag id + delivery method, NOT by `mediaFormat.id` — NPE's
/// dedup collides itag 140 and 141 because both are M4A.
fn push_audio_dedup(list: &mut Vec<AudioStream>, candidate: AudioStream) {
if list
.iter()

View file

@ -72,8 +72,8 @@ fn envelope_to_body(envelope: Value) -> Map<String, Value> {
}
}
/// WEB-client metadata-only /player call. Per audit Track A §4.4 — used
/// for microformat + thumbnails only; never used as a stream URL source.
/// WEB-client metadata-only /player call — used for microformat +
/// thumbnails only; never used as a stream URL source.
pub fn get_web_metadata_player_response(
video_id: &str,
localization: &Localization,