diff --git a/Cargo.toml b/Cargo.toml index c220572..c161388 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ version = "0.1.0" edition = "2021" license = "GPL-3.0-or-later" authors = ["Sulkta-Coop"] -repository = "http://192.168.0.5:3001/Sulkta-Coop/strawcore" +repository = "https://git.sulkta.com/Sulkta-Coop/strawcore" description = "Rust port of NewPipeExtractor (YT-only). Plugs into Straw via UniFFI." [lib] diff --git a/README.md b/README.md index 6833d16..9c7b8ac 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,8 @@ # strawcore -Rust port of [NewPipeExtractor](https://github.com/TeamNewPipe/NewPipeExtractor) (v0.26.2), YouTube-only. Plugs into [Straw](http://192.168.0.5:3001/Sulkta-Coop/straw) via UniFFI. +Rust port of [NewPipeExtractor](https://github.com/TeamNewPipe/NewPipeExtractor) (v0.26.2), YouTube-only. Plugs into [Straw](https://git.sulkta.com/Sulkta-Coop/straw) via UniFFI. -## Why this exists - -`rustypipe` regex-parses YouTube's `player.js` and reimplements the signature deobfuscator in Rust. Every YT player rotation breaks it. NPE embeds Mozilla Rhino and executes the JS function live — resilient by design, and that's the architecture we're mirroring. - -The rustypipe-backed Straw build (vc=15..17) also routed playback through iOS-progressive URLs, which hit a server-side ~917 KiB end-byte cap. NPE uses the Android client + po_token → DASH manifest path, which doesn't see the cap. Same fix, different layer. - -See `memory/npe-audit-2026-05-24/SPEC.md` in the workspace repo for the full plan. - -## Status - -| Phase | Subsystem | Status | -|---|---|---| -| 1 | Foundation (downloader + service spine) | **in progress** | -| 2 | JS engine (rquickjs + ress) | pending | -| 3 | InnerTube + itag table | pending | -| 4 | Stream extractor + DASH | pending | -| 5 | PoTokenProvider trait + Android JNI bridge | pending | -| 6 | Search + Channel + Playlist + Kiosks | pending | -| 7 | UniFFI surface swap | pending | -| 8 | Delete rustypipe everywhere | pending | +`rustypipe` regex-parses YouTube's `player.js` and reimplements the signature deobfuscator in Rust — every YT player rotation breaks it. NPE embeds a JS engine and executes the function live, which survives rotations. strawcore mirrors that architecture on QuickJS via rquickjs. ## Build + test diff --git a/src/downloader/mod.rs b/src/downloader/mod.rs index c6e659d..b709902 100644 --- a/src/downloader/mod.rs +++ b/src/downloader/mod.rs @@ -1,7 +1,7 @@ // Downloader contract — mirrors NPE's Downloader abstract class. // -// Foundational invariants (SPEC §3, audited from NPE Downloader.java + -// OkHttpDownloaderImpl in the NewPipe-app): +// Foundational invariants (audited from NPE Downloader.java + the +// NewPipe-app OkHttpDownloaderImpl): // // * No automatic cookie jar. `Cookie:` header is hand-built per request. // * HTTP non-2xx is NOT an error. Only HTTP 429 throws diff --git a/src/downloader/request.rs b/src/downloader/request.rs index 024cb86..e88a523 100644 --- a/src/downloader/request.rs +++ b/src/downloader/request.rs @@ -1,6 +1,6 @@ // Request + RequestBuilder — mirrors NPE Request.java. // -// PARITY: add_header silently overwrites instead of appending, per NPE +// add_header silently overwrites instead of appending, per NPE // Request.java:215-221. Callers depend on this. append_header is our // own clean addition for callers we control. @@ -91,7 +91,7 @@ impl RequestBuilder { } } - /// PARITY with NPE Request.Builder.addHeader: silently overwrites any + /// Mirrors NPE Request.Builder.addHeader: silently overwrites any /// existing values for `name`. Callers downstream of NPE-derived code /// depend on this. For new code prefer [`Self::append_header`]. pub fn add_header(mut self, name: impl Into, value: impl Into) -> Self { diff --git a/src/downloader/response.rs b/src/downloader/response.rs index 4d00faf..cd34d32 100644 --- a/src/downloader/response.rs +++ b/src/downloader/response.rs @@ -1,8 +1,8 @@ // Response — mirrors NPE Response.java. // -// Header keys are lowercased (SPEC §3 invariant #3). latest_url tracks the -// final URL after redirect chasing — used by every linkHandler and the -// channel resolver loop. +// Header keys are lowercased. latest_url tracks the final URL after +// redirect chasing — used by every linkHandler and the channel resolver +// loop. use std::collections::BTreeMap; diff --git a/src/image.rs b/src/image.rs index 25e3444..7fb0dc0 100644 --- a/src/image.rs +++ b/src/image.rs @@ -1,8 +1,7 @@ // Image + ImageSet + ResolutionLevel. Mirrors NPE Image.java. // -// HEIGHT_UNKNOWN / WIDTH_UNKNOWN are -1 sentinels per SPEC §3 invariant #10 -// — kept as i32, not Option, because several JSON output sites encode -// this directly. +// HEIGHT_UNKNOWN / WIDTH_UNKNOWN are -1 sentinels — kept as i32, not +// Option, because several JSON output sites encode this directly. pub const HEIGHT_UNKNOWN: i32 = -1; pub const WIDTH_UNKNOWN: i32 = -1; diff --git a/src/localization.rs b/src/localization.rs index 822925f..21620dc 100644 --- a/src/localization.rs +++ b/src/localization.rs @@ -1,7 +1,7 @@ -// Localization + ContentCountry. Per SPEC §3 invariant #9, the DEFAULT -// Localization is ("en", "GB") — not en-US, not the system locale. -// NPE's Localization.java exposes ~100 country codes; we ship a small -// in-source set today and grow as needed. +// Localization + ContentCountry. The DEFAULT Localization is ("en", "GB") +// — not en-US, not the system locale. NPE's Localization.java exposes +// ~100 country codes; we ship a small in-source set today and grow as +// needed. use std::fmt; diff --git a/src/youtube/channel.rs b/src/youtube/channel.rs index a28e3ed..cb5df07 100644 --- a/src/youtube/channel.rs +++ b/src/youtube/channel.rs @@ -8,9 +8,9 @@ // URL, walks `endpoint.browseEndpoint.browseId` to get the UC... id, and // retries the browse call. Up to 3 redirect hops. // -// Tab parsing (videos/shorts/live/playlists) is in audit Track D §5 — -// `tabs[].tabRenderer.endpoint.browseEndpoint.params` is the magic -// base64 needed to land on each tab. +// Tab parsing (videos/shorts/live/playlists): the magic base64 needed to +// land on each tab lives at +// `tabs[].tabRenderer.endpoint.browseEndpoint.params`. use serde_json::Value; @@ -88,8 +88,8 @@ pub fn resolve_handle_to_channel_id(url_fragment: &str) -> Result Result { diff --git a/src/youtube/client_request.rs b/src/youtube/client_request.rs index 63f2547..55577f4 100644 --- a/src/youtube/client_request.rs +++ b/src/youtube/client_request.rs @@ -2,9 +2,9 @@ // InnertubeClientRequestInfo.java + the prepareJsonBuilder() flow in // YoutubeParsingHelper.java:1494-1559. // -// Wire-order matters (audit Track A §2.1) — `serde_json::json!` macro -// preserves insertion order in the resulting Map, but we use a sequence -// of `.insert()` calls into a `serde_json::Map` to be explicit. +// Wire-order matters — `serde_json::json!` macro preserves insertion +// order in the resulting Map, but we use a sequence of `.insert()` +// calls into a `serde_json::Map` to be explicit. use serde_json::{json, Map, Value}; @@ -114,7 +114,7 @@ impl InnertubeClientRequestInfo { } /// Builds the InnerTube request envelope mirroring NPE prepareJsonBuilder. -/// Insertion order matches NPE's wire-order verbatim (audit Track A §2.1). +/// Insertion order matches NPE's wire-order verbatim. pub fn build_envelope( info: &InnertubeClientRequestInfo, localization: &Localization, diff --git a/src/youtube/constants.rs b/src/youtube/constants.rs index 6673da8..b5e2ac4 100644 --- a/src/youtube/constants.rs +++ b/src/youtube/constants.rs @@ -30,8 +30,8 @@ pub const ANDROID_CLIENT_ID: &str = "3"; pub const ANDROID_CLIENT_NAME: &str = "ANDROID"; pub const ANDROID_CLIENT_VERSION: &str = "21.03.36"; -// PARITY: NPE hard-codes androidSdkVersion=36 + osVersion=16 even though -// the User-Agent advertises Android 15. DroidGuard doesn't check the +// NPE hard-codes androidSdkVersion=36 + osVersion=16 even though the +// User-Agent advertises Android 15. DroidGuard doesn't check the // InnerTube context so this mismatch is intentional and not a bug. pub const ANDROID_SDK_VERSION: u32 = 36; pub const ANDROID_OS_VERSION: &str = "16"; diff --git a/src/youtube/itag.rs b/src/youtube/itag.rs index a6f6195..025bdc3 100644 --- a/src/youtube/itag.rs +++ b/src/youtube/itag.rs @@ -172,9 +172,7 @@ mod tests { #[test] fn table_has_57_entries() { - // Audit Track A §7 says "53" in prose but tallies the same 57 - // entries below. NPE source ItagItem.java has 57 distinct itag - // IDs. Matches exactly. + // NPE source ItagItem.java has 57 distinct itag IDs. Matches exactly. assert_eq!(ITAG_TABLE.len(), 57); } diff --git a/src/youtube/js/extractor.rs b/src/youtube/js/extractor.rs index be27f38..3ec0883 100644 --- a/src/youtube/js/extractor.rs +++ b/src/youtube/js/extractor.rs @@ -5,7 +5,7 @@ // 1. iframe_api regex (primary) // 2. embed/ page — Jsoup script-tag walk + jsUrl regex fallback // -// PARITY: we deliberately reproduce NPE's bug where `select("script") +// We deliberately reproduce NPE's bug where `select("script") // .attr("name", "player/base")` *mutates* the script tags and iterates ALL // of them. The intent was "find the script with name=player/base" but // Jsoup's attr-setter doesn't filter. Our walk does the same — iterate @@ -77,7 +77,7 @@ fn extract_from_embed(downloader: &dyn Downloader, video_id: &str) -> Result tag (the `.attr("name","player/base")` + // NPE iterates every