vc=66: hybrid feed backfill — RSS-fast + streamInfo-complete

Cobb asked for views + durations back in the subs feed without
giving up the 5-10× RSS speedup vc=56 bought. Hybrid path:

1. Rust wrapper — new enrich_feed_item(video_url) ->
   EnrichedFeedMetadata { view_count, duration_seconds }. Thin
   wrapper around stream_info that discards the heavy play-URL
   payload. Future opt: parse watch-page HTML JSON state directly
   to skip JS deobf entirely. ~150 lines of pluck logic, punted.

2. EnrichmentStore — new SharedPreferences-lite store keyed by
   videoId, value Enrichment(viewCount, durationSeconds,
   fetchedAt). Bound to Settings.cacheTtl for staleness. Hard cap
   5000 entries with oldest-eviction.

3. SubscriptionFeedViewModel — after the RSS refresh paints,
   enrichVisibleItems() fans out enrichFeedItem for the first 30
   items (skipping any already enriched fresh). Bounded at 8 wide
   so we don't hammer YT; each call ~500ms full streamInfo so
   30 items in ~2s. Runs on StrawApp.globalScope so a
   refresh-cancel doesn't kill the in-flight enrichment.
   mergeFromCache overlays the enrichment via .withEnrichment()
   so RSS rows pick up viewCount + durationSeconds the moment
   they land. The Enrichment store's StateFlow.value is read on
   every merge call; the enrichment-complete handler triggers a
   _ui.update that re-merges.

Net behavior: feed paints instantly from RSS (no view/duration),
~2s later the visible top-N populate with full metadata. Cached
forever (or until TTL/cap). Subsequent opens read straight from
EnrichmentStore.

StrawApp.onCreate inits the new store alongside the existing
SP-backed ones.
This commit is contained in:
Kayos 2026-05-26 13:40:26 -07:00
parent 7156208c3c
commit dd151e322d
5 changed files with 236 additions and 3 deletions

View file

@ -55,6 +55,6 @@ const val NEWPIPE_APPLICATION_ID_NEW = "net.newpipe.app"
// vc=19 / 0.1.0-AE — rust pipeline cutover. Extraction via
// strawcore-core (Sulkta-Coop/strawcore) via the UniFFI wrapper; no
// NewPipeExtractor in the runtime path.
const val STRAW_VERSION_CODE = 65
const val STRAW_VERSION_NAME = "0.1.0-BY"
const val STRAW_VERSION_CODE = 66
const val STRAW_VERSION_NAME = "0.1.0-BZ"
const val STRAW_APPLICATION_ID = "com.sulkta.straw"

View file

@ -25,6 +25,36 @@ const RSS_BASE: &str = "https://www.youtube.com/feeds/videos.xml?channel_id=";
const MAX_CONCURRENT: usize = 50;
const PER_CHANNEL_TIMEOUT_S: u64 = 8;
/// Hybrid-backfill metadata: just the two fields RSS doesn't return
/// (view count + duration). Kotlin calls this lazily for visible feed
/// items after the RSS-fed paint to fill in the gaps that
/// channel_feed_rss leaves empty.
///
/// vc=66 — built specifically so the subs feed can show 'N views ·
/// X duration' the way YT does, without paying the full channel_info
/// page-scrape cost on initial paint. The underlying stream_info IS
/// heavier than we'd like (~500ms each, runs JS deobf for play URLs
/// we'll discard) — future opt would be to parse the watch-page HTML
/// JSON state directly for just these two fields. ~100ms savings per
/// call but ~150 lines of HTML/JSON pluck logic. Punted until needed.
#[derive(Debug, Clone, uniffi::Record)]
pub struct EnrichedFeedMetadata {
pub view_count: i64,
pub duration_seconds: i64,
}
#[uniffi::export(async_runtime = "tokio")]
pub async fn enrich_feed_item(
video_url: String,
) -> Result<EnrichedFeedMetadata, StrawcoreError> {
crate::runtime::ensure_initialized();
let info = crate::stream::stream_info(video_url).await?;
Ok(EnrichedFeedMetadata {
view_count: info.view_count,
duration_seconds: info.duration_seconds,
})
}
/// Single-channel RSS — Kotlin keeps its per-channel cache + fan-out
/// (parallelism cranked to 50 in the wrapper). Each call is ~50-150ms
/// instead of the ~500ms channelInfo page-scrape, so a 50-sub refresh

View file

@ -7,6 +7,7 @@ package com.sulkta.straw
import android.app.Application
import com.sulkta.straw.data.FeedCache
import com.sulkta.straw.data.FeedEnrichment
import com.sulkta.straw.data.History
import com.sulkta.straw.data.Playlists
import com.sulkta.straw.data.Resume
@ -72,6 +73,7 @@ class StrawApp : Application() {
Subscriptions.init(this)
Playlists.init(this)
Resume.init(this)
FeedEnrichment.init(this)
// vc=36 audit HIGH-R3: FeedCache (~225 KB) + SearchCache
// (~150 KB) JSON-decode at construction. Stash the
// applicationContext eagerly (cheap) so `get()` is callable

View file

@ -0,0 +1,118 @@
/*
* SPDX-FileCopyrightText: 2026 Sulkta-Coop
* SPDX-License-Identifier: GPL-3.0-or-later
*
* Subs-feed enrichment cache. RSS gives us title/url/thumbnail/date
* fast but no view count or duration. After a feed refresh paints
* from RSS, SubscriptionFeedViewModel fans out lightweight
* uniffi.strawcore.enrichFeedItem() calls for the top visible items
* and stashes the results here. mergeFromCache overlays the
* enrichment onto each StreamItem at render time so the row shows
* 'N views · X duration' once available.
*
* Storage: SharedPreferences-lite, single JSON blob keyed by videoId.
* TTL bound to Settings.cacheTtl so enrichments age out alongside the
* rest of the cache. Hard cap at MAX_ENRICHMENTS to bound disk +
* memory.
*/
package com.sulkta.straw.data
import android.content.Context
import android.content.SharedPreferences
import kotlinx.coroutines.flow.MutableStateFlow
import kotlinx.coroutines.flow.StateFlow
import kotlinx.coroutines.flow.asStateFlow
import kotlinx.coroutines.flow.updateAndGet
import kotlinx.serialization.Serializable
import kotlinx.serialization.json.Json
@Serializable
data class Enrichment(
val viewCount: Long,
val durationSeconds: Long,
val fetchedAt: Long,
)
private const val PREFS = "straw_feed_enrichment"
private const val KEY = "enrichments_v1"
/**
* Hard ceiling keeps the JSON blob below ~250 KB even at the cap
* (50 bytes/entry × 5000 = 250 KB). The user-facing cap doesn't tie
* to this; enrichment is "cache" not "user data."
*/
private const val MAX_ENRICHMENTS = 5_000
class EnrichmentStore(context: Context) {
private val sp: SharedPreferences = context.getSharedPreferences(PREFS, Context.MODE_PRIVATE)
private val json = Json { ignoreUnknownKeys = true }
private val _entries = MutableStateFlow(load())
val entries: StateFlow<Map<String, Enrichment>> = _entries.asStateFlow()
/**
* Return a fresh enrichment for this videoId, or null when missing
* or aged out per Settings.cacheTtl. Forever-TTL never expires.
*/
fun get(videoId: String): Enrichment? {
if (videoId.isBlank()) return null
val e = _entries.value[videoId] ?: return null
val ttl = Settings.get().cacheTtl.value
if (ttl.isForever) return e
val cutoff = System.currentTimeMillis() - ttl.ms
return if (e.fetchedAt >= cutoff) e else null
}
fun put(videoId: String, viewCount: Long, durationSeconds: Long) {
if (videoId.isBlank()) return
// Don't write all-zero entries — that's failure not data, and
// would waste a slot the cap could spend on a real hit.
if (viewCount <= 0L && durationSeconds <= 0L) return
val entry = Enrichment(
viewCount = viewCount,
durationSeconds = durationSeconds,
fetchedAt = System.currentTimeMillis(),
)
val before = _entries.value
val next = _entries.updateAndGet { current ->
val withEntry = current + (videoId to entry)
if (withEntry.size > MAX_ENRICHMENTS) {
withEntry.entries
.sortedByDescending { it.value.fetchedAt }
.take(MAX_ENRICHMENTS)
.associate { it.key to it.value }
} else {
withEntry
}
}
if (next !== before) {
sp.edit().putString(KEY, json.encodeToString(next)).apply()
}
}
fun clear() {
_entries.updateAndGet { emptyMap() }
sp.edit().putString(KEY, json.encodeToString(emptyMap<String, Enrichment>())).apply()
}
private fun load(): Map<String, Enrichment> = runCatching {
val s = sp.getString(KEY, null) ?: return emptyMap()
json.decodeFromString<Map<String, Enrichment>>(s)
}.getOrDefault(emptyMap())
}
object FeedEnrichment {
@Volatile private var instance: EnrichmentStore? = null
fun init(context: Context) {
if (instance == null) {
synchronized(this) {
if (instance == null) instance = EnrichmentStore(context.applicationContext)
}
}
}
fun get(): EnrichmentStore = instance
?: error("EnrichmentStore not initialized — call FeedEnrichment.init(context)")
}

View file

@ -19,8 +19,10 @@ package com.sulkta.straw.feature.feed
import androidx.lifecycle.ViewModel
import androidx.lifecycle.viewModelScope
import com.sulkta.straw.data.ChannelRef
import com.sulkta.straw.data.Enrichment
import com.sulkta.straw.data.FeedCache
import com.sulkta.straw.data.FeedCacheEntry
import com.sulkta.straw.data.FeedEnrichment
import com.sulkta.straw.data.Settings
import com.sulkta.straw.data.Subscriptions
import com.sulkta.straw.feature.search.StreamItem
@ -191,13 +193,19 @@ class SubscriptionFeedViewModel : ViewModel() {
.awaitAll()
}
pruneCacheToSubs(channels)
val freshItems = mergeFromCache(channels)
_ui.update {
SubscriptionFeedUiState(
loading = false,
items = mergeFromCache(channels),
items = freshItems,
lastFetchedAt = System.currentTimeMillis(),
)
}
// vc=66 — hybrid backfill. RSS-fed items have
// viewCount=0 + durationSeconds=0; kick a bounded
// background job that calls enrichFeedItem for the
// top items and pumps a fresh _ui emit when done.
enrichVisibleItems(freshItems)
// Persist what we just freshened. Off the main thread —
// JSON encode on 30 subs * 30 items is small but not
// free, and SharedPreferences.apply is async anyway.
@ -283,7 +291,15 @@ class SubscriptionFeedViewModel : ViewModel() {
// Pre-compute recencyScore once per item — vc=35 audit
// MED-Q15: sortedWith's comparator was invoking the regex
// twice per pair, so ~1800 regex matches on a 900-item merge.
//
// vc=66 — overlay FeedEnrichment data on each item so RSS-fed
// rows (viewCount=0, durationSeconds=0) get backfilled with
// metadata fetched by the background enrichment job below.
// Pure read of the enrichment store; the enrichment write
// path triggers a fresh _ui emit.
val enrichments = FeedEnrichment.get().entries.value
return channels.flatMap { ch -> channelCache[ch.url]?.items.orEmpty() }
.map { it.withEnrichment(enrichments) }
.map { it to it.recencyScore() }
.sortedWith(
compareByDescending<Pair<StreamItem, Long>> { it.second }
@ -293,6 +309,73 @@ class SubscriptionFeedViewModel : ViewModel() {
.map { it.first }
}
/**
* Background enrichment: pulls viewCount + durationSeconds for the
* top-N freshly-merged items via the lightweight
* uniffi.strawcore.enrichFeedItem endpoint. Bounded parallel
* (8-wide) each call is ~500ms full streamInfo, so 30 items
* complete in ~2s. Skipped per-item when FeedEnrichment already
* has a fresh hit (TTL controlled by Settings.cacheTtl).
*
* Runs OFF viewModelScope so a refresh-cancel doesn't kill an
* enrichment that's almost done the background fill is for
* NEXT-open paint, no rush. Uses StrawApp.globalScope.
*/
private fun enrichVisibleItems(items: List<StreamItem>) {
val take = items.take(ENRICH_HEAD_COUNT)
.filter { it.viewCount <= 0L && it.durationSeconds <= 0L }
if (take.isEmpty()) return
com.sulkta.straw.StrawApp.globalScope.launch {
val gate = Semaphore(ENRICH_PARALLELISM)
coroutineScope {
take.map { item ->
async {
gate.withPermit {
val videoId = com.sulkta.straw.feature.detail.extractYtVideoId(item.url)
?: return@withPermit
if (FeedEnrichment.get().get(videoId) != null) return@withPermit
val md = runCatchingCancellable {
withContext(Dispatchers.IO) {
uniffi.strawcore.enrichFeedItem(item.url)
}
}.getOrNull() ?: return@withPermit
FeedEnrichment.get().put(
videoId,
md.viewCount,
md.durationSeconds,
)
}
}
}.awaitAll()
}
// Pump a fresh emit so the UI picks up the overlay.
withContext(Dispatchers.Main) {
val channels = Subscriptions.get().subs.value
_ui.update { it.copy(items = mergeFromCache(channels)) }
}
}
}
private val ENRICH_HEAD_COUNT = 30
private val ENRICH_PARALLELISM = 8
/**
* Apply an enrichment overlay to a StreamItem. Only fills fields
* that RSS left empty if the source already had non-zero values
* (e.g. a channelInfo path populated them) we don't clobber.
*/
private fun StreamItem.withEnrichment(
enrichments: Map<String, Enrichment>,
): StreamItem {
if (viewCount > 0L && durationSeconds > 0L) return this
val videoId = com.sulkta.straw.feature.detail.extractYtVideoId(url) ?: return this
val e = enrichments[videoId] ?: return this
return copy(
viewCount = if (viewCount > 0L) viewCount else e.viewCount,
durationSeconds = if (durationSeconds > 0L) durationSeconds else e.durationSeconds,
)
}
/**
* Clear in-memory cache. Called from Settings when the user flips
* off the local-cache toggle disk wipe via FeedCacheStore.clear()