vc=85: image caching + SB/RYD clients -> Rust + crash/autoplay fixes
All checks were successful
build-apk / build-and-publish (push) Successful in 7m18s
gitleaks / scan (push) Successful in 43s

- Thumbnails + channel icons stay cached: pin an explicit 256MB Coil disk
  cache + sized memory cache via SingletonImageLoader.Factory. Coil's
  default disk cap is 2% of the device's free space, so on a storage-tight
  phone the subs feed (most image-heavy screen) thrashed it and
  re-downloaded thumbnails on every visit.
- SponsorBlock + Return-YouTube-Dislike clients moved Kotlin -> Rust
  (strawcore net.rs: fetchSponsorSegments / fetchRydVotes). SponsorBlock
  keeps its privacy-preserving SHA-256 hash-prefix lookup. Kotlin is now a
  thin shim mapping the FFI records onto the SbSegment/RydVotes domain
  types; behavior identical. Migration #2 of "all backend -> Rust".
- Fix crash: extract_channel_id sliced the channel URL by a length derived
  from a lowercased copy of itself; to_lowercase() can change byte length
  on non-ASCII, so a non-ASCII URL tail could panic across the FFI and
  abort the app on a feed refresh. Now matches the prefix case-insensitively
  against the original with length + char-boundary guards.
- Fix autoplay hijack: advancing to the next video resolves over ~500ms; if
  you manually start a different video meanwhile, autoplay would replace
  your choice with the stale next-up. Added a staleness fence.

Verified: cargo check/test/clippy on the wrapper, full Android
compileDebugKotlin green, adversarial FFI pre-push audit passed.
This commit is contained in:
Cobb 2026-06-21 12:59:04 -07:00
parent addd074f61
commit 055c9c6d4f
10 changed files with 487 additions and 141 deletions

74
rust/Cargo.lock generated
View file

@ -224,6 +224,15 @@ version = "2.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4388bee8683e3d04af747c73422af53102d2bd24d9eadb6cbc100baef4b43f8"
[[package]]
name = "block-buffer"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
dependencies = [
"generic-array",
]
[[package]]
name = "bumpalo"
version = "3.20.3"
@ -373,6 +382,15 @@ version = "0.4.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc14f565cf027a105f7a44ccf9e5b424348421a1d8952a8fc9d499d313107789"
[[package]]
name = "cpufeatures"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
dependencies = [
"libc",
]
[[package]]
name = "crc32fast"
version = "1.5.0"
@ -382,6 +400,26 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "crypto-common"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a"
dependencies = [
"generic-array",
"typenum",
]
[[package]]
name = "digest"
version = "0.10.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
dependencies = [
"block-buffer",
"crypto-common",
]
[[package]]
name = "displaydoc"
version = "0.2.6"
@ -542,6 +580,16 @@ dependencies = [
"slab",
]
[[package]]
name = "generic-array"
version = "0.14.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
dependencies = [
"typenum",
"version_check",
]
[[package]]
name = "getrandom"
version = "0.2.17"
@ -1425,6 +1473,17 @@ dependencies = [
"serde",
]
[[package]]
name = "sha2"
version = "0.10.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
dependencies = [
"cfg-if",
"cpufeatures",
"digest",
]
[[package]]
name = "shlex"
version = "1.3.0"
@ -1499,6 +1558,9 @@ dependencies = [
"quick-xml",
"reqwest",
"rquickjs-sys",
"serde",
"serde_json",
"sha2",
"strawcore-core",
"thiserror 1.0.69",
"tokio",
@ -1771,6 +1833,12 @@ version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
[[package]]
name = "typenum"
version = "1.20.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6f5e870be6c3b371b77fe0ee0bafb859fa4964b4404c27de1d380043c4dda20"
[[package]]
name = "unicase"
version = "2.9.0"
@ -1950,6 +2018,12 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "version_check"
version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
name = "want"
version = "0.3.1"

View file

@ -44,6 +44,13 @@ android_logger = { version = "0.14", default-features = false }
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "gzip", "stream"] }
quick-xml = "0.36"
futures = "0.3"
# RYD + SponsorBlock JSON clients (net.rs). serde/serde_json are already in
# the dependency tree via strawcore-core; declaring them here lets the
# wrapper parse the two small enrichment endpoints directly. sha2 powers
# SponsorBlock's privacy-preserving SHA-256 hash-prefix lookup.
serde = { version = "1", features = ["derive"] }
serde_json = "1"
sha2 = "0.10"
[build-dependencies]
uniffi = { version = "0.28", features = ["build"] }

View file

@ -149,44 +149,11 @@ async fn fetch_channel_rss(client: &Client, channel_url: &str) -> Option<Vec<Sea
.error_for_status()
.ok()?;
// Streaming body read with a hard byte cap — `.text()` reads
// unbounded into a String.
let body = read_capped_body(resp).await?;
// unbounded into a String. Shared with the RYD/SB path (net.rs).
let body = crate::net::read_capped_body(resp, RSS_MAX_BYTES).await?;
parse_rss(&body, channel_id)
}
/// Drain a reqwest Response into a String, bailing out (return None) if
/// the body exceeds RSS_MAX_BYTES.
async fn read_capped_body(resp: reqwest::Response) -> Option<String> {
use futures::StreamExt;
let mut total = 0usize;
let mut buf: Vec<u8> = Vec::with_capacity(32 * 1024);
let mut stream = resp.bytes_stream();
while let Some(chunk_result) = stream.next().await {
let chunk = chunk_result.ok()?;
// Defense-in-depth: a single hostile chunk can be arbitrarily
// large (HTTP allows multi-GiB chunks). Reject any one chunk
// bigger than the whole body cap before we even add it to the
// running total — protects against hyper having already
// allocated the chunk on our behalf.
if chunk.len() > RSS_MAX_BYTES {
log::warn!("strawcore::rss single chunk {} exceeds cap; aborting", chunk.len());
return None;
}
total = total.saturating_add(chunk.len());
if total > RSS_MAX_BYTES {
log::warn!("strawcore::rss body exceeded {RSS_MAX_BYTES} bytes; aborting");
return None;
}
buf.extend_from_slice(&chunk);
}
// Lossy decode — A strict from_utf8
// returns None on any invalid byte, so a single mojibake title
// would silently drop the entire channel from the feed. quick-xml
// tolerates U+FFFD replacement chars and the per-entry skip-on-
// empty handles broken entries downstream.
Some(String::from_utf8_lossy(&buf).into_owned())
}
/// Extract the `UCxxx` channel ID from a channel URL. Accepts the
/// shapes the Android app actually has in Subscriptions plus the ones
/// users paste from share intents:
@ -203,7 +170,6 @@ async fn read_capped_body(resp: reqwest::Response) -> Option<String> {
/// cache the ID into Subscriptions.
fn extract_channel_id(input: &str) -> Option<String> {
let trimmed = input.trim();
let trimmed_lower = trimmed.to_lowercase();
// Match the "<scheme>://<host>/channel/" prefix in a single sweep
// so we accept http/https + www./m. variants without four-way
// string-strip ladders. ANCHORED at the start of the string —
@ -220,11 +186,23 @@ fn extract_channel_id(input: &str) -> Option<String> {
"http://m.youtube.com/channel/",
];
for p in PREFIXES {
if let Some(rest) = trimmed_lower.strip_prefix(p) {
// Bytes match 1:1 with `trimmed` since the prefix is ASCII
// and case-folding ASCII doesn't change byte length.
let rest_in_original = &trimmed[p.len()..p.len() + rest.len()];
let id = rest_in_original
// Case-insensitive prefix match WITHOUT lowercasing the whole
// string first. The prior version did `trimmed.to_lowercase()`
// then sliced the *original* by the lowercased copy's length —
// but `to_lowercase()` can change byte length on non-ASCII input
// (e.g. the part after the prefix), so `p.len() + rest.len()`
// could run past the end of `trimmed` or land mid-UTF-8-char and
// PANIC. Since a panic here crosses the UniFFI boundary it aborts
// the whole app on a feed refresh of a channel with any non-ASCII
// in the URL tail. The prefixes are pure ASCII, so compare the
// first p.len() bytes case-insensitively against the ORIGINAL
// (guarded by length + char-boundary) and slice the original
// directly — no lowercase round-trip, no length mismatch.
if trimmed.len() >= p.len()
&& trimmed.is_char_boundary(p.len())
&& trimmed[..p.len()].eq_ignore_ascii_case(p)
{
let id = trimmed[p.len()..]
.split(|c: char| c == '/' || c == '?' || c == '#')
.next()?;
return validate_channel_id(id);

View file

@ -13,6 +13,7 @@ use std::sync::Once;
mod channel;
mod error;
mod feed;
mod net;
mod runtime;
mod search;
mod stream;
@ -20,6 +21,7 @@ mod stream;
// Re-exports so UniFFI sees the types at the crate root for macro discovery.
pub use channel::ChannelInfo;
pub use error::StrawcoreError;
pub use net::{RydVotes, SponsorSegment};
pub use search::{Page, SearchItem};
pub use stream::{AudioStreamItem, ResolvedStreams, StreamInfo, VideoStreamItem};

275
rust/strawcore/src/net.rs Normal file
View file

@ -0,0 +1,275 @@
// Small third-party HTTP/JSON clients that used to live in Kotlin
// (`net/RydClient.kt` + `net/SponsorBlockClient.kt`). Ported to Rust as
// part of the "all backend logic -> Rust" migration: these are network +
// parse, which belongs behind the FFI, not in the Android UI layer. Kotlin
// keeps thin shims that map these Records onto its existing domain types.
//
// Both endpoints are best-effort enrichment: any failure (transport,
// non-2xx, oversized body, malformed JSON) collapses to "no data"
// (None / empty Vec), exactly as the Kotlin originals did via runCatching.
// We never surface a StrawcoreError here — a dead RYD/SB host must not
// break video playback.
use std::sync::OnceLock;
use std::time::Duration;
use reqwest::Client;
use serde::Deserialize;
use sha2::{Digest, Sha256};
/// Matches the UA the Kotlin clients sent (some of these public APIs
/// rate-limit or shape responses by UA). Kept byte-identical to the old
/// `STRAW_USER_AGENT` in `net/Http.kt`.
const STRAW_USER_AGENT: &str =
"Mozilla/5.0 (Linux; Android 14) AppleWebKit/537.36 (KHTML, like Gecko) \
Chrome/124.0.0.0 Mobile Safari/537.36 Straw/1.0";
/// Body caps carried over 1:1 from `net/Http.kt` (RYD_MAX_BYTES /
/// SB_MAX_BYTES). A hostile or compromised host must not be able to
/// stream a GB-scale body into memory.
const RYD_MAX_BYTES: usize = 256 * 1024;
const SB_MAX_BYTES: usize = 1024 * 1024;
const RYD_VOTES_URL: &str = "https://returnyoutubedislikeapi.com/votes";
const SB_SKIP_BASE: &str = "https://sponsor.ajay.app/api/skipSegments/";
/// Shared reqwest client for the small enrichment endpoints. Mirrors the
/// old OkHttp config (`connectTimeout 15s`, `readTimeout 30s`). One pool
/// for RYD + SB — they're low-volume and the old code shared one OkHttp
/// client too.
static CLIENT: OnceLock<Client> = OnceLock::new();
fn client() -> Option<&'static Client> {
if let Some(c) = CLIENT.get() {
return Some(c);
}
let built = Client::builder()
.connect_timeout(Duration::from_secs(15))
.timeout(Duration::from_secs(30))
.user_agent(STRAW_USER_AGENT)
.redirect(reqwest::redirect::Policy::limited(3))
.build()
.ok()?;
Some(CLIENT.get_or_init(|| built))
}
/// Drain a reqwest Response into a String, returning None if the body
/// exceeds `cap`. Shared with the RSS feed path (`feed.rs`). Per-chunk
/// guard first (HTTP allows multi-GiB chunks; hyper may have already
/// allocated one before we see it), then the running total.
pub(crate) async fn read_capped_body(resp: reqwest::Response, cap: usize) -> Option<String> {
use futures::StreamExt;
let mut total = 0usize;
let mut buf: Vec<u8> = Vec::with_capacity(32 * 1024);
let mut stream = resp.bytes_stream();
while let Some(chunk_result) = stream.next().await {
let chunk = chunk_result.ok()?;
if chunk.len() > cap {
log::warn!("strawcore::net single chunk {} exceeds cap; aborting", chunk.len());
return None;
}
total = total.saturating_add(chunk.len());
if total > cap {
log::warn!("strawcore::net body exceeded {cap} bytes; aborting");
return None;
}
buf.extend_from_slice(&chunk);
}
// Lossy decode: a strict from_utf8 would drop the whole response on a
// single mojibake byte; serde_json tolerates U+FFFD in string values.
Some(String::from_utf8_lossy(&buf).into_owned())
}
// ---------------------------------------------------------------------------
// Return YouTube Dislike
// ---------------------------------------------------------------------------
/// Vote counts from the Return-YouTube-Dislike API. Kotlin maps this onto
/// its own `net.RydVotes` data class (the detail-screen overlay model).
#[derive(Debug, Clone, uniffi::Record)]
pub struct RydVotes {
pub id: String,
pub likes: i64,
pub dislikes: i64,
pub rating: f64,
pub view_count: i64,
}
#[derive(Deserialize)]
struct RydVotesWire {
// `id` is required (no default) to match the Kotlin original, whose
// non-nullable `id: String` made a response missing `id` fail to parse
// and return null. likes/dislikes/rating/viewCount keep defaults — the
// Kotlin data class defaulted those.
id: String,
#[serde(default)]
likes: i64,
#[serde(default)]
dislikes: i64,
#[serde(default)]
rating: f64,
#[serde(default)]
#[serde(rename = "viewCount")]
view_count: i64,
}
/// GET https://returnyoutubedislikeapi.com/votes?videoId=<id>
/// Returns None on any failure (transport / non-2xx / oversize / bad JSON),
/// matching the Kotlin client's runCatching-to-null contract.
#[uniffi::export(async_runtime = "tokio")]
pub async fn fetch_ryd_votes(video_id: String) -> Option<RydVotes> {
log::info!("strawcore::ryd fetch id_len={}", video_id.len());
let client = client()?;
let resp = client
.get(RYD_VOTES_URL)
.query(&[("videoId", video_id.as_str())])
.header("Accept", "application/json")
.send()
.await
.ok()?;
if !resp.status().is_success() {
return None;
}
let body = read_capped_body(resp, RYD_MAX_BYTES).await?;
let wire: RydVotesWire = serde_json::from_str(&body)
.map_err(|e| log::warn!("strawcore::ryd json decode failed: {e}"))
.ok()?;
Some(RydVotes {
id: wire.id,
likes: wire.likes,
dislikes: wire.dislikes,
rating: wire.rating,
view_count: wire.view_count,
})
}
// ---------------------------------------------------------------------------
// SponsorBlock
// ---------------------------------------------------------------------------
/// One SponsorBlock segment. Kotlin maps this onto its `net.SbSegment`
/// (reconstructing the `[start, end]` list its serializer expects).
#[derive(Debug, Clone, uniffi::Record)]
pub struct SponsorSegment {
pub category: String,
pub start_sec: f64,
pub end_sec: f64,
pub uuid: Option<String>,
pub action_type: Option<String>,
}
#[derive(Deserialize)]
struct SbVideoWire {
#[serde(rename = "videoID")]
video_id: String,
#[serde(default)]
segments: Vec<SbSegmentWire>,
}
#[derive(Deserialize)]
struct SbSegmentWire {
#[serde(rename = "UUID")]
uuid: Option<String>,
category: String,
#[serde(default)]
segment: Vec<f64>,
#[serde(rename = "actionType")]
action_type: Option<String>,
}
/// SponsorBlock skip-segment lookup via the privacy-preserving SHA-256
/// hash-prefix endpoint (k-anonymity): we send only the first 4 hex chars
/// of sha256(videoId), the server returns segments for every video whose
/// hash shares that prefix, and we filter to the exact match locally — so
/// the server never learns which video the user is watching.
///
/// GET https://sponsor.ajay.app/api/skipSegments/<prefix4>?categories=[...]
/// Returns an empty Vec on any failure, matching the Kotlin contract.
#[uniffi::export(async_runtime = "tokio")]
pub async fn fetch_sponsor_segments(
video_id: String,
categories: Vec<String>,
) -> Vec<SponsorSegment> {
log::info!(
"strawcore::sb fetch id_len={} categories={}",
video_id.len(),
categories.len()
);
match fetch_sponsor_segments_inner(&video_id, &categories).await {
Some(v) => v,
None => Vec::new(),
}
}
async fn fetch_sponsor_segments_inner(
video_id: &str,
categories: &[String],
) -> Option<Vec<SponsorSegment>> {
let client = client()?;
let prefix = sha256_prefix4(video_id);
let url = format!("{SB_SKIP_BASE}{prefix}");
// Encode the category list as a JSON array, the form the SB API
// expects (`?categories=["sponsor","selfpromo"]`). reqwest's `.query`
// percent-encodes the value for us.
let categories_json = serde_json::to_string(categories).ok()?;
let resp = client
.get(&url)
.query(&[("categories", categories_json.as_str())])
.header("Accept", "application/json")
.send()
.await
.ok()?;
if !resp.status().is_success() {
return None;
}
let body = read_capped_body(resp, SB_MAX_BYTES).await?;
let videos: Vec<SbVideoWire> = serde_json::from_str(&body)
.map_err(|e| log::warn!("strawcore::sb json decode failed: {e}"))
.ok()?;
// The prefix lookup returns many videos; keep only ours.
let mine = videos.into_iter().find(|v| v.video_id == video_id)?;
let out: Vec<SponsorSegment> = mine
.segments
.into_iter()
.map(|s| SponsorSegment {
category: s.category,
// Kotlin read segment[0]/segment[1] with a 0.0 fallback; match
// that so a malformed 1-element segment doesn't drop the row.
start_sec: s.segment.first().copied().unwrap_or(0.0),
end_sec: s.segment.get(1).copied().unwrap_or(0.0),
uuid: s.uuid,
action_type: s.action_type,
})
.collect();
Some(out)
}
/// First 4 lowercase-hex chars of sha256(input) — i.e. the first two
/// bytes of the digest. Matches Kotlin's
/// `sha256Hex(videoId).substring(0, 4)`.
fn sha256_prefix4(input: &str) -> String {
let digest = Sha256::digest(input.as_bytes());
format!("{:02x}{:02x}", digest[0], digest[1])
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn prefix_matches_known_sha256() {
// sha256("dQw4w9WgXcQ") starts 5f6b — this is the 4-char prefix
// SponsorBlock would receive for that video id.
assert_eq!(sha256_prefix4("dQw4w9WgXcQ"), "5f6b");
// Empty string -> the well-known empty-sha256 digest starts e3b0.
assert_eq!(sha256_prefix4(""), "e3b0");
}
#[test]
fn segment_fallback_on_short_array() {
// Mirror the unwrap_or(0.0) guard for a malformed 1-element segment.
let seg = vec![12.5f64];
assert_eq!(seg.first().copied().unwrap_or(0.0), 12.5);
assert_eq!(seg.get(1).copied().unwrap_or(0.0), 0.0);
}
}