vc=56: subs feed via RSS (5-10x faster) + hide-shorts filter

Strawcore — new channel_feed_rss(channel_url) and subscription_feed
(bulk fan-out 50x via tokio buffer_unordered). Fetches the YouTube
Atom RSS at /feeds/videos.xml?channel_id=UCxxx. Each call is
~50-150ms vs ~500ms for the InnerTube channel_info page-scrape.
Deps added to strawcore wrapper Cargo.toml: reqwest (rustls-tls),
quick-xml, futures. reqwest dedupes against strawcore-core's
existing reqwest dep.

App — SubscriptionFeedViewModel.fetchChannelInto swapped to
channel_feed_rss. Parallelism cranked 12 -> 50 since each fetch is
lightweight now. perChannelMax dropped 30 -> 15 (the RSS upstream
cap is 15). RSS doesn't carry duration / viewCount / avatar — those
backfill on tap-through via the existing streamInfo path. Avatar
opportunistic-refresh dropped from this path (lazy-load on
ChannelScreen open is enough).

Hide-shorts content filter — new util/ContentFilter.kt with
looksLikeShort() (URL /shorts/ match OR title contains
'#shorts'/'#short'). Settings toggle defaults off. Filter applies
at row-emit in SubsPane, SearchScreen, ChannelScreen. Paid +
age-restricted stubs in place for vc=57 when strawcore-core gets
the flags.

Expected refresh time on 50 subs: ~30s sequential -> ~1s parallel-50
RSS.
This commit is contained in:
Kayos 2026-05-26 10:44:06 -07:00
parent ccd24c4ed3
commit 341261584a
10 changed files with 421 additions and 27 deletions

265
rust/strawcore/src/feed.rs Normal file
View file

@ -0,0 +1,265 @@
// vc=56 — fast subscription feed via YouTube's per-channel RSS endpoint.
//
// YouTube serves `https://www.youtube.com/feeds/videos.xml?channel_id=UCxxx`
// — small Atom XML, no auth, no JS, no InnerTube round-trip. Replaces the
// per-channel `channel_info()` page-scrape that was costing ~500ms each
// (the bottleneck behind NewPipe's "pull to refresh takes 30 seconds for
// 50 subs" UX). Fan-out 50× concurrent via `futures::stream::buffer_unordered`
// turns a 50-sub refresh from ~5-8s parallel-12 to ~1s parallel-50.
//
// RSS is intentionally lossy — it returns title/url/published/thumbnail
// only. No duration, no view count, no shorts/age/paid flags. That's the
// right trade for a feed-refresh use case: tap-through still goes through
// the full stream_info path to fetch the rich metadata when actually
// needed.
use std::time::Duration;
use futures::stream::{self, StreamExt};
use reqwest::Client;
use crate::error::StrawcoreError;
use crate::search::SearchItem;
const RSS_BASE: &str = "https://www.youtube.com/feeds/videos.xml?channel_id=";
const MAX_CONCURRENT: usize = 50;
const PER_CHANNEL_TIMEOUT_S: u64 = 8;
/// Single-channel RSS — Kotlin keeps its per-channel cache + fan-out
/// (parallelism cranked to 50 in the wrapper). Each call is ~50-150ms
/// instead of the ~500ms channelInfo page-scrape, so a 50-sub refresh
/// drops from ~5-8s to ~1s.
#[uniffi::export(async_runtime = "tokio")]
pub async fn channel_feed_rss(
channel_url: String,
) -> Result<Vec<SearchItem>, StrawcoreError> {
crate::runtime::ensure_initialized();
log::info!("strawcore::channel_feed_rss url_len={}", channel_url.len());
let client = Client::builder()
.timeout(Duration::from_secs(PER_CHANNEL_TIMEOUT_S))
.user_agent("Mozilla/5.0 (Android; Mobile; Straw/0.1)")
.build()
.map_err(|e| StrawcoreError::Extractor {
msg: format!("http client build: {e}"),
})?;
Ok(fetch_channel_rss(&client, &channel_url).await.unwrap_or_default())
}
/// Bulk subscription feed fan-out — for callers that want one round-trip
/// to Rust. Currently unused by the Android app (it sticks with the
/// per-channel cache), but exposed for future desktop / web variants
/// or for a "warm everything" background prefetch.
#[uniffi::export(async_runtime = "tokio")]
pub async fn subscription_feed(
channel_urls: Vec<String>,
) -> Result<Vec<SearchItem>, StrawcoreError> {
crate::runtime::ensure_initialized();
log::info!("strawcore::subscription_feed channels={}", channel_urls.len());
if channel_urls.is_empty() {
return Ok(Vec::new());
}
let client = Client::builder()
.timeout(Duration::from_secs(PER_CHANNEL_TIMEOUT_S))
.user_agent("Mozilla/5.0 (Android; Mobile; Straw/0.1)")
.build()
.map_err(|e| StrawcoreError::Extractor {
msg: format!("http client build: {e}"),
})?;
let results: Vec<Vec<SearchItem>> = stream::iter(channel_urls.into_iter())
.map(|url| {
let client = client.clone();
async move { fetch_channel_rss(&client, &url).await.unwrap_or_default() }
})
.buffer_unordered(MAX_CONCURRENT)
.collect()
.await;
let mut flat: Vec<SearchItem> = results.into_iter().flatten().collect();
// Newest first by published timestamp baked into the upload_date_relative
// field at parse time — RSS already returns entries newest-first per
// channel so we mostly just need cross-channel interleave.
flat.sort_by(|a, b| b.upload_date_relative.cmp(&a.upload_date_relative));
Ok(flat)
}
async fn fetch_channel_rss(client: &Client, channel_url: &str) -> Option<Vec<SearchItem>> {
let channel_id = extract_channel_id(channel_url)?;
let url = format!("{RSS_BASE}{channel_id}");
let body = client
.get(&url)
.send()
.await
.ok()?
.error_for_status()
.ok()?
.text()
.await
.ok()?;
parse_rss(&body, channel_id)
}
/// Extract the `UCxxx` channel ID from a channel URL. Handles the
/// common shapes:
/// * `https://www.youtube.com/channel/UCxxx...`
/// * `https://www.youtube.com/UCxxx...` (canonical clone)
/// * raw `UCxxx...` (already an ID)
///
/// `@handle` URLs are NOT supported here — RSS requires the channel ID.
/// Callers that only have an @handle should resolve via channel_info()
/// once, cache the ID into Subscriptions, and pass the ID forever after.
fn extract_channel_id(input: &str) -> Option<String> {
let trimmed = input.trim();
if let Some(stripped) = trimmed.strip_prefix("https://www.youtube.com/channel/") {
return Some(stripped.split('/').next()?.to_string());
}
if let Some(stripped) = trimmed.strip_prefix("https://youtube.com/channel/") {
return Some(stripped.split('/').next()?.to_string());
}
if trimmed.starts_with("UC") && trimmed.len() >= 22 && trimmed.len() <= 26 {
return Some(trimmed.to_string());
}
None
}
fn parse_rss(body: &str, channel_id: String) -> Option<Vec<SearchItem>> {
use quick_xml::events::Event;
use quick_xml::Reader;
let mut reader = Reader::from_str(body);
reader.config_mut().trim_text(true);
let mut buf = Vec::new();
let mut items: Vec<SearchItem> = Vec::new();
// Per-entry scratch.
let mut in_entry = false;
let mut depth = 0u8;
let mut video_id = String::new();
let mut title = String::new();
let mut uploader = String::new();
let mut uploader_url = String::new();
let mut thumbnail: Option<String> = None;
let mut published = String::new();
// What text-collecting state we're in. Replaced per element open.
let mut text_target: Option<TextTarget> = None;
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(e)) => {
let local = local_name(e.name().as_ref());
if local == "entry" {
in_entry = true;
depth = 0;
video_id.clear();
title.clear();
uploader.clear();
uploader_url.clear();
thumbnail = None;
published.clear();
}
if !in_entry {
continue;
}
depth = depth.saturating_add(1);
text_target = match local {
"videoId" => Some(TextTarget::VideoId),
"title" if depth <= 2 => Some(TextTarget::Title),
"name" => Some(TextTarget::UploaderName),
"uri" => Some(TextTarget::UploaderUrl),
"published" => Some(TextTarget::Published),
_ => None,
};
}
Ok(Event::Empty(e)) => {
if !in_entry {
continue;
}
let local = local_name(e.name().as_ref());
// <media:thumbnail url="..."/> is self-closing.
if local == "thumbnail" {
for attr in e.attributes().flatten() {
if attr.key.as_ref() == b"url" {
if let Ok(v) = attr.unescape_value() {
thumbnail = Some(v.into_owned());
}
}
}
}
}
Ok(Event::Text(t)) => {
if !in_entry {
continue;
}
let Ok(s) = t.unescape() else { continue };
let s = s.as_ref();
match text_target {
Some(TextTarget::VideoId) => video_id.push_str(s),
Some(TextTarget::Title) => title.push_str(s),
Some(TextTarget::UploaderName) => uploader.push_str(s),
Some(TextTarget::UploaderUrl) => uploader_url.push_str(s),
Some(TextTarget::Published) => published.push_str(s),
None => {}
}
}
Ok(Event::End(e)) => {
if !in_entry {
continue;
}
let local = local_name(e.name().as_ref());
if local == "entry" {
if !video_id.is_empty() {
items.push(SearchItem {
url: format!("https://www.youtube.com/watch?v={video_id}"),
title: title.clone(),
uploader: uploader.clone(),
uploader_url: if uploader_url.is_empty() {
Some(format!("https://www.youtube.com/channel/{channel_id}"))
} else {
Some(uploader_url.clone())
},
thumbnail: thumbnail.clone(),
duration_seconds: 0,
view_count: 0,
// RSS gives ISO-8601 timestamps. We pass them
// through unchanged — newer-first sorting on
// raw ISO strings is correct.
upload_date_relative: published.clone(),
});
}
in_entry = false;
depth = 0;
} else {
depth = depth.saturating_sub(1);
}
text_target = None;
}
Ok(Event::Eof) => break,
Err(_) => return None,
_ => {}
}
buf.clear();
}
Some(items)
}
enum TextTarget {
VideoId,
Title,
UploaderName,
UploaderUrl,
Published,
}
/// Strip the namespace prefix off an XML element name. YouTube's feed
/// is heavily namespaced (`yt:videoId`, `media:thumbnail`) but we only
/// care about the local part — namespace-vs-local distinguishing
/// would just bloat the matcher.
fn local_name(qualified: &[u8]) -> &str {
let s = std::str::from_utf8(qualified).unwrap_or("");
match s.rfind(':') {
Some(idx) => &s[idx + 1..],
None => s,
}
}

View file

@ -12,6 +12,7 @@ use std::sync::Once;
mod channel;
mod error;
mod feed;
mod runtime;
mod search;
mod stream;