// vc=56 — fast subscription feed via YouTube's per-channel RSS endpoint. // // YouTube serves `https://www.youtube.com/feeds/videos.xml?channel_id=UCxxx` // — small Atom XML, no auth, no JS, no InnerTube round-trip. Replaces the // per-channel `channel_info()` page-scrape that was costing ~500ms each // (the bottleneck behind NewPipe's "pull to refresh takes 30 seconds for // 50 subs" UX). Fan-out 50× concurrent via `futures::stream::buffer_unordered` // turns a 50-sub refresh from ~5-8s parallel-12 to ~1s parallel-50. // // RSS is intentionally lossy — it returns title/url/published/thumbnail // only. No duration, no view count, no shorts/age/paid flags. That's the // right trade for a feed-refresh use case: tap-through still goes through // the full stream_info path to fetch the rich metadata when actually // needed. use std::sync::OnceLock; use std::time::Duration; use futures::stream::{self, StreamExt}; use reqwest::Client; use crate::error::StrawcoreError; use crate::search::SearchItem; const RSS_BASE: &str = "https://www.youtube.com/feeds/videos.xml?channel_id="; const MAX_CONCURRENT: usize = 50; const PER_CHANNEL_TIMEOUT_S: u64 = 8; /// Cap on the body bytes we'll read for a single RSS fetch. Real YT /// Atom feeds are ~5-30 KB; 2 MiB leaves comfortable headroom while /// blocking a hostile or compromised host from streaming GB-scale /// bodies into JVM memory inside the 8s timeout. Round-67 audit /// rust-HIGH-5. const RSS_MAX_BYTES: usize = 2 * 1024 * 1024; /// Cap on parsed entries per channel — RSS normally returns 15. /// 50 leaves headroom for one-off legitimate variance; anything /// past that is a sign the feed isn't what we expect. /// Round-67 audit rust-MED-6. const RSS_MAX_ENTRIES: usize = 50; /// Year range we trust civil-to-days math for. Strawcore RSS only /// emits real-world recent uploads; clamping here turns adversarial /// year fields into a parse failure rather than i64 overflow. /// Round-67 audit rust-CRIT-1. const YEAR_MIN: i32 = 1970; const YEAR_MAX: i32 = 2200; /// Hybrid-backfill metadata: just the two fields RSS doesn't return /// (view count + duration). Kotlin calls this lazily for visible feed /// items after the RSS-fed paint to fill in the gaps that /// channel_feed_rss leaves empty. /// /// vc=66 — built specifically so the subs feed can show 'N views · /// X duration' the way YT does, without paying the full channel_info /// page-scrape cost on initial paint. The underlying stream_info IS /// heavier than we'd like (~500ms each, runs JS deobf for play URLs /// we'll discard) — future opt would be to parse the watch-page HTML /// JSON state directly for just these two fields. ~100ms savings per /// call but ~150 lines of HTML/JSON pluck logic. Punted until needed. #[derive(Debug, Clone, uniffi::Record)] pub struct EnrichedFeedMetadata { pub view_count: i64, pub duration_seconds: i64, } #[uniffi::export(async_runtime = "tokio")] pub async fn enrich_feed_item( video_url: String, ) -> Result { crate::runtime::ensure_initialized(); let info = crate::stream::stream_info(video_url).await?; Ok(EnrichedFeedMetadata { view_count: info.view_count, duration_seconds: info.duration_seconds, }) } /// Shared reqwest Client — DNS resolver + TLS keepalive + connection /// pool live here so a 50-channel fan-out reuses one pool instead of /// paying 50 handshakes. Round-67 audit rust-HIGH-4. static RSS_CLIENT: OnceLock = OnceLock::new(); fn rss_client() -> Result<&'static Client, StrawcoreError> { if let Some(c) = RSS_CLIENT.get() { return Ok(c); } let client = Client::builder() .timeout(Duration::from_secs(PER_CHANNEL_TIMEOUT_S)) .user_agent(concat!("Mozilla/5.0 (Android; Mobile; Straw/", env!("CARGO_PKG_VERSION"), ")")) // Cap redirect chains so a misconfigured/hostile feed can't // spin a server out of our 8s budget. Round-67 audit rust-LOW-8. .redirect(reqwest::redirect::Policy::limited(3)) .build() .map_err(|e| StrawcoreError::Extractor { msg: format!("http client build: {e}"), })?; Ok(RSS_CLIENT.get_or_init(|| client)) } /// Single-channel RSS — Kotlin keeps its per-channel cache + fan-out /// (parallelism cranked to 50 in the wrapper). Each call is ~50-150ms /// instead of the ~500ms channelInfo page-scrape, so a 50-sub refresh /// drops from ~5-8s to ~1s. #[uniffi::export(async_runtime = "tokio")] pub async fn channel_feed_rss( channel_url: String, ) -> Result, StrawcoreError> { crate::runtime::ensure_initialized(); log::info!("strawcore::channel_feed_rss url_len={}", channel_url.len()); let client = rss_client()?; Ok(fetch_channel_rss(client, &channel_url).await.unwrap_or_default()) } /// Bulk subscription feed fan-out — for callers that want one round-trip /// to Rust. Currently unused by the Android app (it sticks with the /// per-channel cache), but exposed for future desktop / web variants /// or for a "warm everything" background prefetch. #[uniffi::export(async_runtime = "tokio")] pub async fn subscription_feed( channel_urls: Vec, ) -> Result, StrawcoreError> { crate::runtime::ensure_initialized(); log::info!("strawcore::subscription_feed channels={}", channel_urls.len()); if channel_urls.is_empty() { return Ok(Vec::new()); } let client = rss_client()?; let results: Vec> = stream::iter(channel_urls.into_iter()) .map(|url| async move { fetch_channel_rss(client, &url).await.unwrap_or_default() }) .buffer_unordered(MAX_CONCURRENT) .collect() .await; // Per-channel ordering is RSS-served-newest-first. Cross-channel // interleave is the caller's responsibility — Kotlin's mergeFromCache // sorts by parsed recency, which is the source of truth. Returning // the flat list as-is. (vc=66 prior code sorted lexicographically // on the relative-date STRING, which is wrong because "10 hours // ago" < "2 hours ago" in cmp order — round-67 audit rust-HIGH-6.) Ok(results.into_iter().flatten().collect()) } async fn fetch_channel_rss(client: &Client, channel_url: &str) -> Option> { let channel_id = extract_channel_id(channel_url)?; let url = format!("{RSS_BASE}{channel_id}"); let resp = client .get(&url) .send() .await .ok()? .error_for_status() .ok()?; // Streaming body read with a hard byte cap — `.text()` reads // unbounded into a String. Round-67 audit rust-HIGH-5. let body = read_capped_body(resp).await?; parse_rss(&body, channel_id) } /// Drain a reqwest Response into a String, bailing out (return None) if /// the body exceeds RSS_MAX_BYTES. Round-67 audit rust-HIGH-5. async fn read_capped_body(resp: reqwest::Response) -> Option { use futures::StreamExt; let mut total = 0usize; let mut buf: Vec = Vec::with_capacity(32 * 1024); let mut stream = resp.bytes_stream(); while let Some(chunk_result) = stream.next().await { let chunk = chunk_result.ok()?; total = total.saturating_add(chunk.len()); if total > RSS_MAX_BYTES { log::warn!("strawcore::rss body exceeded {RSS_MAX_BYTES} bytes; aborting"); return None; } buf.extend_from_slice(&chunk); } String::from_utf8(buf).ok() } /// Extract the `UCxxx` channel ID from a channel URL. Accepts the /// shapes the Android app actually has in Subscriptions plus the ones /// users paste from share intents: /// * `https://www.youtube.com/channel/UCxxx...` /// * `https://youtube.com/channel/UCxxx...` /// * `http(s)://m.youtube.com/channel/UCxxx...` /// * trailing `/videos`, `?si=...`, etc — anything after the ID is dropped /// * raw `UCxxx...` (already an ID) /// /// Real YT channel IDs are EXACTLY 24 chars (`UC` + 22 base64-ish). /// Round-67 audit rust-HIGH-1. /// /// `@handle` URLs are NOT supported here — RSS requires the channel ID. /// Callers with @handles should resolve via channel_info() once and /// cache the ID into Subscriptions. fn extract_channel_id(input: &str) -> Option { let trimmed = input.trim(); let trimmed_lower = trimmed.to_lowercase(); // Match the ":///channel/" prefix in a single sweep // so we accept http/https + www./m. variants without four-way // string-strip ladders. const PREFIXES: &[&str] = &[ "https://www.youtube.com/channel/", "https://youtube.com/channel/", "https://m.youtube.com/channel/", "http://www.youtube.com/channel/", "http://youtube.com/channel/", "http://m.youtube.com/channel/", ]; for p in PREFIXES { if let Some(idx) = trimmed_lower.find(p) { let rest = &trimmed[idx + p.len()..]; let id = rest.split(|c: char| c == '/' || c == '?' || c == '#').next()?; return validate_channel_id(id); } } validate_channel_id(trimmed) } /// A real YouTube channel ID is `UC` followed by exactly 22 chars from /// `[A-Za-z0-9_-]`. Round-67 audit rust-HIGH-1. fn validate_channel_id(id: &str) -> Option { if id.len() != 24 || !id.starts_with("UC") { return None; } if !id.bytes().skip(2).all(|b| { matches!(b, b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'_' | b'-') }) { return None; } Some(id.to_string()) } fn parse_rss(body: &str, channel_id: String) -> Option> { use quick_xml::events::Event; use quick_xml::Reader; let mut reader = Reader::from_str(body); reader.config_mut().trim_text(true); let mut buf = Vec::new(); let mut items: Vec = Vec::new(); // Per-entry scratch. let mut in_entry = false; let mut depth = 0u8; let mut video_id = String::new(); let mut title = String::new(); let mut uploader = String::new(); let mut uploader_url = String::new(); let mut thumbnail: Option = None; let mut published = String::new(); // What text-collecting state we're in. Replaced per element open. let mut text_target: Option = None; loop { match reader.read_event_into(&mut buf) { Ok(Event::Start(e)) => { let name = e.name(); let local = local_name(name.as_ref()); if local == "entry" { in_entry = true; depth = 0; video_id.clear(); title.clear(); uploader.clear(); uploader_url.clear(); thumbnail = None; published.clear(); } if !in_entry { continue; } depth = depth.saturating_add(1); text_target = match local { "videoId" => Some(TextTarget::VideoId), "title" if depth <= 2 => Some(TextTarget::Title), "name" => Some(TextTarget::UploaderName), "uri" => Some(TextTarget::UploaderUrl), "published" => Some(TextTarget::Published), _ => None, }; } Ok(Event::Empty(e)) => { if !in_entry { continue; } let name = e.name(); let local = local_name(name.as_ref()); // is self-closing. if local == "thumbnail" { for attr in e.attributes().flatten() { if attr.key.as_ref() == b"url" { if let Ok(v) = attr.unescape_value() { thumbnail = Some(v.into_owned()); } } } } } Ok(Event::Text(t)) => { if !in_entry { continue; } let Ok(s) = t.unescape() else { continue }; let s = s.as_ref(); match text_target { Some(TextTarget::VideoId) => video_id.push_str(s), Some(TextTarget::Title) => title.push_str(s), Some(TextTarget::UploaderName) => uploader.push_str(s), Some(TextTarget::UploaderUrl) => uploader_url.push_str(s), Some(TextTarget::Published) => published.push_str(s), None => {} } } Ok(Event::End(e)) => { if !in_entry { continue; } let name = e.name(); let local = local_name(name.as_ref()); if local == "entry" { // Skip entries missing the load-bearing fields — // an empty title renders as a blank card the user // can't tap, and an empty published collapses the // recency sort. Round-67 audit rust-HIGH-2. if !video_id.is_empty() && !title.is_empty() && !published.is_empty() { items.push(SearchItem { url: format!("https://www.youtube.com/watch?v={video_id}"), title: title.clone(), uploader: uploader.clone(), uploader_url: if uploader_url.is_empty() { Some(format!("https://www.youtube.com/channel/{channel_id}")) } else { Some(uploader_url.clone()) }, thumbnail: thumbnail.clone(), duration_seconds: 0, view_count: 0, // RSS gives RFC3339 timestamps. Convert to // the human-relative format Kotlin's // recencyScore parser expects ("N units // ago"). vc=56 was passing the raw ISO // through, which broke the sort comparator // — every item tied at MIN_VALUE so the // feed order was effectively random; LTT + // WTYP landed at top because they resolved // first in the fan-out. Caught 2026-05-26. upload_date_relative: iso_to_relative(&published), }); if items.len() >= RSS_MAX_ENTRIES { // Defense-in-depth against a feed that // ships thousands of blocks. // Round-67 audit rust-MED-6. return Some(items); } } in_entry = false; depth = 0; } else { depth = depth.saturating_sub(1); } text_target = None; } Ok(Event::Eof) => break, // Partial-parse on error: return whatever we've already // collected rather than throwing the whole batch away. // A truncated body (EOF mid-stream on a flaky network) // would otherwise silently disappear the channel. // Round-67 audit rust-CRIT-3. Err(e) => { log::warn!("strawcore::rss parse error after {} items: {e}", items.len()); return Some(items); } _ => {} } buf.clear(); } Some(items) } enum TextTarget { VideoId, Title, UploaderName, UploaderUrl, Published, } /// Parse an RFC3339 timestamp (`2026-05-25T15:00:00+00:00`) into "N /// units ago". Drops the timezone offset — YT RSS always serves UTC /// and the granularity is days at most, so a ±14h skew doesn't matter /// for the relative display. /// /// Falls back to the raw string if parsing fails. That keeps the UI /// readable even on a malformed feed (rare). fn iso_to_relative(iso: &str) -> String { let secs = match parse_rfc3339_secs(iso) { Some(s) => s, None => return iso.to_string(), }; let now_secs = std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .map(|d| d.as_secs() as i64) .unwrap_or(0); // A device with a skewed clock can see RSS timestamps as future- // dated. saturating_sub returns 0 → "0 seconds ago" → sorts to // top, which is the LTT/WTYP-recurrence vector. Treat future // dates as "just now" so the relative-string sort behaves and // a single skewed item doesn't pin itself at the top of the // feed. Round-67 audit rust-HIGH-7. if secs > now_secs { return "just now".to_string(); } format_relative(now_secs - secs) } fn parse_rfc3339_secs(s: &str) -> Option { if s.len() < 19 { return None; } let date = s.get(..10)?; let time = s.get(11..19)?; if !s.is_char_boundary(10) || s.as_bytes().get(10) != Some(&b'T') { return None; } let mut date_parts = date.split('-'); let y: i32 = date_parts.next()?.parse().ok()?; let m: u32 = date_parts.next()?.parse().ok()?; let d: u32 = date_parts.next()?.parse().ok()?; let mut time_parts = time.split(':'); let hh: u32 = time_parts.next()?.parse().ok()?; let mm: u32 = time_parts.next()?.parse().ok()?; let ss: u32 = time_parts.next()?.parse().ok()?; // Year clamp BEFORE civil_to_days — out-of-range years overflow // the era arithmetic in debug, wrap in release. A hostile feed // serving year=2147483647 must not produce junk timestamps. // Round-67 audit rust-CRIT-1. if !(YEAR_MIN..=YEAR_MAX).contains(&y) { return None; } if !(1..=12).contains(&m) || !(1..=31).contains(&d) || hh > 23 || mm > 59 || ss > 60 { return None; } let days = civil_to_days(y, m, d); Some(days * 86_400 + hh as i64 * 3_600 + mm as i64 * 60 + ss as i64) } /// Howard Hinnant's days-since-1970-01-01 algorithm. Standard, /// branch-free, handles negative years correctly. Source: chrono /// proposal for C++20. fn civil_to_days(y: i32, m: u32, d: u32) -> i64 { let y = if m <= 2 { y - 1 } else { y }; let era = if y >= 0 { y / 400 } else { (y - 399) / 400 }; let yoe = (y - era * 400) as u32; let doy = (153 * (if m > 2 { m - 3 } else { m + 9 }) + 2) / 5 + d - 1; let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy; era as i64 * 146_097 + doe as i64 - 719_468 } fn format_relative(age_secs: i64) -> String { let s = age_secs.max(0); fn unit(n: i64, name: &str) -> String { format!("{} {}{} ago", n, name, if n == 1 { "" } else { "s" }) } if s < 60 { unit(s, "second") } else if s < 3_600 { unit(s / 60, "minute") } else if s < 86_400 { unit(s / 3_600, "hour") } else if s < 604_800 { unit(s / 86_400, "day") } else if s < 2_592_000 { unit(s / 604_800, "week") } else if s < 31_536_000 { unit(s / 2_592_000, "month") } else { unit(s / 31_536_000, "year") } } /// Strip the namespace prefix off an XML element name. YouTube's feed /// is heavily namespaced (`yt:videoId`, `media:thumbnail`) but we only /// care about the local part — namespace-vs-local distinguishing /// would just bloat the matcher. fn local_name(qualified: &[u8]) -> &str { let s = std::str::from_utf8(qualified).unwrap_or(""); match s.rfind(':') { Some(idx) => &s[idx + 1..], None => s, } }