vc=56: subs feed via RSS (5-10x faster) + hide-shorts filter
Strawcore — new channel_feed_rss(channel_url) and subscription_feed (bulk fan-out 50x via tokio buffer_unordered). Fetches the YouTube Atom RSS at /feeds/videos.xml?channel_id=UCxxx. Each call is ~50-150ms vs ~500ms for the InnerTube channel_info page-scrape. Deps added to strawcore wrapper Cargo.toml: reqwest (rustls-tls), quick-xml, futures. reqwest dedupes against strawcore-core's existing reqwest dep. App — SubscriptionFeedViewModel.fetchChannelInto swapped to channel_feed_rss. Parallelism cranked 12 -> 50 since each fetch is lightweight now. perChannelMax dropped 30 -> 15 (the RSS upstream cap is 15). RSS doesn't carry duration / viewCount / avatar — those backfill on tap-through via the existing streamInfo path. Avatar opportunistic-refresh dropped from this path (lazy-load on ChannelScreen open is enough). Hide-shorts content filter — new util/ContentFilter.kt with looksLikeShort() (URL /shorts/ match OR title contains '#shorts'/'#short'). Settings toggle defaults off. Filter applies at row-emit in SubsPane, SearchScreen, ChannelScreen. Paid + age-restricted stubs in place for vc=57 when strawcore-core gets the flags. Expected refresh time on 50 subs: ~30s sequential -> ~1s parallel-50 RSS.
This commit is contained in:
parent
ccd24c4ed3
commit
341261584a
10 changed files with 421 additions and 27 deletions
265
rust/strawcore/src/feed.rs
Normal file
265
rust/strawcore/src/feed.rs
Normal file
|
|
@ -0,0 +1,265 @@
|
|||
// vc=56 — fast subscription feed via YouTube's per-channel RSS endpoint.
|
||||
//
|
||||
// YouTube serves `https://www.youtube.com/feeds/videos.xml?channel_id=UCxxx`
|
||||
// — small Atom XML, no auth, no JS, no InnerTube round-trip. Replaces the
|
||||
// per-channel `channel_info()` page-scrape that was costing ~500ms each
|
||||
// (the bottleneck behind NewPipe's "pull to refresh takes 30 seconds for
|
||||
// 50 subs" UX). Fan-out 50× concurrent via `futures::stream::buffer_unordered`
|
||||
// turns a 50-sub refresh from ~5-8s parallel-12 to ~1s parallel-50.
|
||||
//
|
||||
// RSS is intentionally lossy — it returns title/url/published/thumbnail
|
||||
// only. No duration, no view count, no shorts/age/paid flags. That's the
|
||||
// right trade for a feed-refresh use case: tap-through still goes through
|
||||
// the full stream_info path to fetch the rich metadata when actually
|
||||
// needed.
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use futures::stream::{self, StreamExt};
|
||||
use reqwest::Client;
|
||||
|
||||
use crate::error::StrawcoreError;
|
||||
use crate::search::SearchItem;
|
||||
|
||||
const RSS_BASE: &str = "https://www.youtube.com/feeds/videos.xml?channel_id=";
|
||||
const MAX_CONCURRENT: usize = 50;
|
||||
const PER_CHANNEL_TIMEOUT_S: u64 = 8;
|
||||
|
||||
/// Single-channel RSS — Kotlin keeps its per-channel cache + fan-out
|
||||
/// (parallelism cranked to 50 in the wrapper). Each call is ~50-150ms
|
||||
/// instead of the ~500ms channelInfo page-scrape, so a 50-sub refresh
|
||||
/// drops from ~5-8s to ~1s.
|
||||
#[uniffi::export(async_runtime = "tokio")]
|
||||
pub async fn channel_feed_rss(
|
||||
channel_url: String,
|
||||
) -> Result<Vec<SearchItem>, StrawcoreError> {
|
||||
crate::runtime::ensure_initialized();
|
||||
log::info!("strawcore::channel_feed_rss url_len={}", channel_url.len());
|
||||
let client = Client::builder()
|
||||
.timeout(Duration::from_secs(PER_CHANNEL_TIMEOUT_S))
|
||||
.user_agent("Mozilla/5.0 (Android; Mobile; Straw/0.1)")
|
||||
.build()
|
||||
.map_err(|e| StrawcoreError::Extractor {
|
||||
msg: format!("http client build: {e}"),
|
||||
})?;
|
||||
Ok(fetch_channel_rss(&client, &channel_url).await.unwrap_or_default())
|
||||
}
|
||||
|
||||
/// Bulk subscription feed fan-out — for callers that want one round-trip
|
||||
/// to Rust. Currently unused by the Android app (it sticks with the
|
||||
/// per-channel cache), but exposed for future desktop / web variants
|
||||
/// or for a "warm everything" background prefetch.
|
||||
#[uniffi::export(async_runtime = "tokio")]
|
||||
pub async fn subscription_feed(
|
||||
channel_urls: Vec<String>,
|
||||
) -> Result<Vec<SearchItem>, StrawcoreError> {
|
||||
crate::runtime::ensure_initialized();
|
||||
log::info!("strawcore::subscription_feed channels={}", channel_urls.len());
|
||||
if channel_urls.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
let client = Client::builder()
|
||||
.timeout(Duration::from_secs(PER_CHANNEL_TIMEOUT_S))
|
||||
.user_agent("Mozilla/5.0 (Android; Mobile; Straw/0.1)")
|
||||
.build()
|
||||
.map_err(|e| StrawcoreError::Extractor {
|
||||
msg: format!("http client build: {e}"),
|
||||
})?;
|
||||
|
||||
let results: Vec<Vec<SearchItem>> = stream::iter(channel_urls.into_iter())
|
||||
.map(|url| {
|
||||
let client = client.clone();
|
||||
async move { fetch_channel_rss(&client, &url).await.unwrap_or_default() }
|
||||
})
|
||||
.buffer_unordered(MAX_CONCURRENT)
|
||||
.collect()
|
||||
.await;
|
||||
|
||||
let mut flat: Vec<SearchItem> = results.into_iter().flatten().collect();
|
||||
// Newest first by published timestamp baked into the upload_date_relative
|
||||
// field at parse time — RSS already returns entries newest-first per
|
||||
// channel so we mostly just need cross-channel interleave.
|
||||
flat.sort_by(|a, b| b.upload_date_relative.cmp(&a.upload_date_relative));
|
||||
Ok(flat)
|
||||
}
|
||||
|
||||
async fn fetch_channel_rss(client: &Client, channel_url: &str) -> Option<Vec<SearchItem>> {
|
||||
let channel_id = extract_channel_id(channel_url)?;
|
||||
let url = format!("{RSS_BASE}{channel_id}");
|
||||
let body = client
|
||||
.get(&url)
|
||||
.send()
|
||||
.await
|
||||
.ok()?
|
||||
.error_for_status()
|
||||
.ok()?
|
||||
.text()
|
||||
.await
|
||||
.ok()?;
|
||||
parse_rss(&body, channel_id)
|
||||
}
|
||||
|
||||
/// Extract the `UCxxx` channel ID from a channel URL. Handles the
|
||||
/// common shapes:
|
||||
/// * `https://www.youtube.com/channel/UCxxx...`
|
||||
/// * `https://www.youtube.com/UCxxx...` (canonical clone)
|
||||
/// * raw `UCxxx...` (already an ID)
|
||||
///
|
||||
/// `@handle` URLs are NOT supported here — RSS requires the channel ID.
|
||||
/// Callers that only have an @handle should resolve via channel_info()
|
||||
/// once, cache the ID into Subscriptions, and pass the ID forever after.
|
||||
fn extract_channel_id(input: &str) -> Option<String> {
|
||||
let trimmed = input.trim();
|
||||
if let Some(stripped) = trimmed.strip_prefix("https://www.youtube.com/channel/") {
|
||||
return Some(stripped.split('/').next()?.to_string());
|
||||
}
|
||||
if let Some(stripped) = trimmed.strip_prefix("https://youtube.com/channel/") {
|
||||
return Some(stripped.split('/').next()?.to_string());
|
||||
}
|
||||
if trimmed.starts_with("UC") && trimmed.len() >= 22 && trimmed.len() <= 26 {
|
||||
return Some(trimmed.to_string());
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn parse_rss(body: &str, channel_id: String) -> Option<Vec<SearchItem>> {
|
||||
use quick_xml::events::Event;
|
||||
use quick_xml::Reader;
|
||||
|
||||
let mut reader = Reader::from_str(body);
|
||||
reader.config_mut().trim_text(true);
|
||||
|
||||
let mut buf = Vec::new();
|
||||
let mut items: Vec<SearchItem> = Vec::new();
|
||||
|
||||
// Per-entry scratch.
|
||||
let mut in_entry = false;
|
||||
let mut depth = 0u8;
|
||||
let mut video_id = String::new();
|
||||
let mut title = String::new();
|
||||
let mut uploader = String::new();
|
||||
let mut uploader_url = String::new();
|
||||
let mut thumbnail: Option<String> = None;
|
||||
let mut published = String::new();
|
||||
|
||||
// What text-collecting state we're in. Replaced per element open.
|
||||
let mut text_target: Option<TextTarget> = None;
|
||||
|
||||
loop {
|
||||
match reader.read_event_into(&mut buf) {
|
||||
Ok(Event::Start(e)) => {
|
||||
let local = local_name(e.name().as_ref());
|
||||
if local == "entry" {
|
||||
in_entry = true;
|
||||
depth = 0;
|
||||
video_id.clear();
|
||||
title.clear();
|
||||
uploader.clear();
|
||||
uploader_url.clear();
|
||||
thumbnail = None;
|
||||
published.clear();
|
||||
}
|
||||
if !in_entry {
|
||||
continue;
|
||||
}
|
||||
depth = depth.saturating_add(1);
|
||||
text_target = match local {
|
||||
"videoId" => Some(TextTarget::VideoId),
|
||||
"title" if depth <= 2 => Some(TextTarget::Title),
|
||||
"name" => Some(TextTarget::UploaderName),
|
||||
"uri" => Some(TextTarget::UploaderUrl),
|
||||
"published" => Some(TextTarget::Published),
|
||||
_ => None,
|
||||
};
|
||||
}
|
||||
Ok(Event::Empty(e)) => {
|
||||
if !in_entry {
|
||||
continue;
|
||||
}
|
||||
let local = local_name(e.name().as_ref());
|
||||
// <media:thumbnail url="..."/> is self-closing.
|
||||
if local == "thumbnail" {
|
||||
for attr in e.attributes().flatten() {
|
||||
if attr.key.as_ref() == b"url" {
|
||||
if let Ok(v) = attr.unescape_value() {
|
||||
thumbnail = Some(v.into_owned());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(Event::Text(t)) => {
|
||||
if !in_entry {
|
||||
continue;
|
||||
}
|
||||
let Ok(s) = t.unescape() else { continue };
|
||||
let s = s.as_ref();
|
||||
match text_target {
|
||||
Some(TextTarget::VideoId) => video_id.push_str(s),
|
||||
Some(TextTarget::Title) => title.push_str(s),
|
||||
Some(TextTarget::UploaderName) => uploader.push_str(s),
|
||||
Some(TextTarget::UploaderUrl) => uploader_url.push_str(s),
|
||||
Some(TextTarget::Published) => published.push_str(s),
|
||||
None => {}
|
||||
}
|
||||
}
|
||||
Ok(Event::End(e)) => {
|
||||
if !in_entry {
|
||||
continue;
|
||||
}
|
||||
let local = local_name(e.name().as_ref());
|
||||
if local == "entry" {
|
||||
if !video_id.is_empty() {
|
||||
items.push(SearchItem {
|
||||
url: format!("https://www.youtube.com/watch?v={video_id}"),
|
||||
title: title.clone(),
|
||||
uploader: uploader.clone(),
|
||||
uploader_url: if uploader_url.is_empty() {
|
||||
Some(format!("https://www.youtube.com/channel/{channel_id}"))
|
||||
} else {
|
||||
Some(uploader_url.clone())
|
||||
},
|
||||
thumbnail: thumbnail.clone(),
|
||||
duration_seconds: 0,
|
||||
view_count: 0,
|
||||
// RSS gives ISO-8601 timestamps. We pass them
|
||||
// through unchanged — newer-first sorting on
|
||||
// raw ISO strings is correct.
|
||||
upload_date_relative: published.clone(),
|
||||
});
|
||||
}
|
||||
in_entry = false;
|
||||
depth = 0;
|
||||
} else {
|
||||
depth = depth.saturating_sub(1);
|
||||
}
|
||||
text_target = None;
|
||||
}
|
||||
Ok(Event::Eof) => break,
|
||||
Err(_) => return None,
|
||||
_ => {}
|
||||
}
|
||||
buf.clear();
|
||||
}
|
||||
Some(items)
|
||||
}
|
||||
|
||||
enum TextTarget {
|
||||
VideoId,
|
||||
Title,
|
||||
UploaderName,
|
||||
UploaderUrl,
|
||||
Published,
|
||||
}
|
||||
|
||||
/// Strip the namespace prefix off an XML element name. YouTube's feed
|
||||
/// is heavily namespaced (`yt:videoId`, `media:thumbnail`) but we only
|
||||
/// care about the local part — namespace-vs-local distinguishing
|
||||
/// would just bloat the matcher.
|
||||
fn local_name(qualified: &[u8]) -> &str {
|
||||
let s = std::str::from_utf8(qualified).unwrap_or("");
|
||||
match s.rfind(':') {
|
||||
Some(idx) => &s[idx + 1..],
|
||||
None => s,
|
||||
}
|
||||
}
|
||||
|
|
@ -12,6 +12,7 @@ use std::sync::Once;
|
|||
|
||||
mod channel;
|
||||
mod error;
|
||||
mod feed;
|
||||
mod runtime;
|
||||
mod search;
|
||||
mod stream;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue