vc=66: hybrid feed backfill — RSS-fast + streamInfo-complete

Cobb asked for views + durations back in the subs feed without
giving up the 5-10× RSS speedup vc=56 bought. Hybrid path:

1. Rust wrapper — new enrich_feed_item(video_url) ->
   EnrichedFeedMetadata { view_count, duration_seconds }. Thin
   wrapper around stream_info that discards the heavy play-URL
   payload. Future opt: parse watch-page HTML JSON state directly
   to skip JS deobf entirely. ~150 lines of pluck logic, punted.

2. EnrichmentStore — new SharedPreferences-lite store keyed by
   videoId, value Enrichment(viewCount, durationSeconds,
   fetchedAt). Bound to Settings.cacheTtl for staleness. Hard cap
   5000 entries with oldest-eviction.

3. SubscriptionFeedViewModel — after the RSS refresh paints,
   enrichVisibleItems() fans out enrichFeedItem for the first 30
   items (skipping any already enriched fresh). Bounded at 8 wide
   so we don't hammer YT; each call ~500ms full streamInfo so
   30 items in ~2s. Runs on StrawApp.globalScope so a
   refresh-cancel doesn't kill the in-flight enrichment.
   mergeFromCache overlays the enrichment via .withEnrichment()
   so RSS rows pick up viewCount + durationSeconds the moment
   they land. The Enrichment store's StateFlow.value is read on
   every merge call; the enrichment-complete handler triggers a
   _ui.update that re-merges.

Net behavior: feed paints instantly from RSS (no view/duration),
~2s later the visible top-N populate with full metadata. Cached
forever (or until TTL/cap). Subsequent opens read straight from
EnrichmentStore.

StrawApp.onCreate inits the new store alongside the existing
SP-backed ones.
This commit is contained in:
Kayos 2026-05-26 13:40:26 -07:00
parent 7156208c3c
commit dd151e322d
5 changed files with 236 additions and 3 deletions

View file

@ -55,6 +55,6 @@ const val NEWPIPE_APPLICATION_ID_NEW = "net.newpipe.app"
// vc=19 / 0.1.0-AE — rust pipeline cutover. Extraction via // vc=19 / 0.1.0-AE — rust pipeline cutover. Extraction via
// strawcore-core (Sulkta-Coop/strawcore) via the UniFFI wrapper; no // strawcore-core (Sulkta-Coop/strawcore) via the UniFFI wrapper; no
// NewPipeExtractor in the runtime path. // NewPipeExtractor in the runtime path.
const val STRAW_VERSION_CODE = 65 const val STRAW_VERSION_CODE = 66
const val STRAW_VERSION_NAME = "0.1.0-BY" const val STRAW_VERSION_NAME = "0.1.0-BZ"
const val STRAW_APPLICATION_ID = "com.sulkta.straw" const val STRAW_APPLICATION_ID = "com.sulkta.straw"

View file

@ -25,6 +25,36 @@ const RSS_BASE: &str = "https://www.youtube.com/feeds/videos.xml?channel_id=";
const MAX_CONCURRENT: usize = 50; const MAX_CONCURRENT: usize = 50;
const PER_CHANNEL_TIMEOUT_S: u64 = 8; const PER_CHANNEL_TIMEOUT_S: u64 = 8;
/// Hybrid-backfill metadata: just the two fields RSS doesn't return
/// (view count + duration). Kotlin calls this lazily for visible feed
/// items after the RSS-fed paint to fill in the gaps that
/// channel_feed_rss leaves empty.
///
/// vc=66 — built specifically so the subs feed can show 'N views ·
/// X duration' the way YT does, without paying the full channel_info
/// page-scrape cost on initial paint. The underlying stream_info IS
/// heavier than we'd like (~500ms each, runs JS deobf for play URLs
/// we'll discard) — future opt would be to parse the watch-page HTML
/// JSON state directly for just these two fields. ~100ms savings per
/// call but ~150 lines of HTML/JSON pluck logic. Punted until needed.
#[derive(Debug, Clone, uniffi::Record)]
pub struct EnrichedFeedMetadata {
pub view_count: i64,
pub duration_seconds: i64,
}
#[uniffi::export(async_runtime = "tokio")]
pub async fn enrich_feed_item(
video_url: String,
) -> Result<EnrichedFeedMetadata, StrawcoreError> {
crate::runtime::ensure_initialized();
let info = crate::stream::stream_info(video_url).await?;
Ok(EnrichedFeedMetadata {
view_count: info.view_count,
duration_seconds: info.duration_seconds,
})
}
/// Single-channel RSS — Kotlin keeps its per-channel cache + fan-out /// Single-channel RSS — Kotlin keeps its per-channel cache + fan-out
/// (parallelism cranked to 50 in the wrapper). Each call is ~50-150ms /// (parallelism cranked to 50 in the wrapper). Each call is ~50-150ms
/// instead of the ~500ms channelInfo page-scrape, so a 50-sub refresh /// instead of the ~500ms channelInfo page-scrape, so a 50-sub refresh

View file

@ -7,6 +7,7 @@ package com.sulkta.straw
import android.app.Application import android.app.Application
import com.sulkta.straw.data.FeedCache import com.sulkta.straw.data.FeedCache
import com.sulkta.straw.data.FeedEnrichment
import com.sulkta.straw.data.History import com.sulkta.straw.data.History
import com.sulkta.straw.data.Playlists import com.sulkta.straw.data.Playlists
import com.sulkta.straw.data.Resume import com.sulkta.straw.data.Resume
@ -72,6 +73,7 @@ class StrawApp : Application() {
Subscriptions.init(this) Subscriptions.init(this)
Playlists.init(this) Playlists.init(this)
Resume.init(this) Resume.init(this)
FeedEnrichment.init(this)
// vc=36 audit HIGH-R3: FeedCache (~225 KB) + SearchCache // vc=36 audit HIGH-R3: FeedCache (~225 KB) + SearchCache
// (~150 KB) JSON-decode at construction. Stash the // (~150 KB) JSON-decode at construction. Stash the
// applicationContext eagerly (cheap) so `get()` is callable // applicationContext eagerly (cheap) so `get()` is callable

View file

@ -0,0 +1,118 @@
/*
* SPDX-FileCopyrightText: 2026 Sulkta-Coop
* SPDX-License-Identifier: GPL-3.0-or-later
*
* Subs-feed enrichment cache. RSS gives us title/url/thumbnail/date
* fast but no view count or duration. After a feed refresh paints
* from RSS, SubscriptionFeedViewModel fans out lightweight
* uniffi.strawcore.enrichFeedItem() calls for the top visible items
* and stashes the results here. mergeFromCache overlays the
* enrichment onto each StreamItem at render time so the row shows
* 'N views · X duration' once available.
*
* Storage: SharedPreferences-lite, single JSON blob keyed by videoId.
* TTL bound to Settings.cacheTtl so enrichments age out alongside the
* rest of the cache. Hard cap at MAX_ENRICHMENTS to bound disk +
* memory.
*/
package com.sulkta.straw.data
import android.content.Context
import android.content.SharedPreferences
import kotlinx.coroutines.flow.MutableStateFlow
import kotlinx.coroutines.flow.StateFlow
import kotlinx.coroutines.flow.asStateFlow
import kotlinx.coroutines.flow.updateAndGet
import kotlinx.serialization.Serializable
import kotlinx.serialization.json.Json
@Serializable
data class Enrichment(
val viewCount: Long,
val durationSeconds: Long,
val fetchedAt: Long,
)
private const val PREFS = "straw_feed_enrichment"
private const val KEY = "enrichments_v1"
/**
* Hard ceiling keeps the JSON blob below ~250 KB even at the cap
* (50 bytes/entry × 5000 = 250 KB). The user-facing cap doesn't tie
* to this; enrichment is "cache" not "user data."
*/
private const val MAX_ENRICHMENTS = 5_000
class EnrichmentStore(context: Context) {
private val sp: SharedPreferences = context.getSharedPreferences(PREFS, Context.MODE_PRIVATE)
private val json = Json { ignoreUnknownKeys = true }
private val _entries = MutableStateFlow(load())
val entries: StateFlow<Map<String, Enrichment>> = _entries.asStateFlow()
/**
* Return a fresh enrichment for this videoId, or null when missing
* or aged out per Settings.cacheTtl. Forever-TTL never expires.
*/
fun get(videoId: String): Enrichment? {
if (videoId.isBlank()) return null
val e = _entries.value[videoId] ?: return null
val ttl = Settings.get().cacheTtl.value
if (ttl.isForever) return e
val cutoff = System.currentTimeMillis() - ttl.ms
return if (e.fetchedAt >= cutoff) e else null
}
fun put(videoId: String, viewCount: Long, durationSeconds: Long) {
if (videoId.isBlank()) return
// Don't write all-zero entries — that's failure not data, and
// would waste a slot the cap could spend on a real hit.
if (viewCount <= 0L && durationSeconds <= 0L) return
val entry = Enrichment(
viewCount = viewCount,
durationSeconds = durationSeconds,
fetchedAt = System.currentTimeMillis(),
)
val before = _entries.value
val next = _entries.updateAndGet { current ->
val withEntry = current + (videoId to entry)
if (withEntry.size > MAX_ENRICHMENTS) {
withEntry.entries
.sortedByDescending { it.value.fetchedAt }
.take(MAX_ENRICHMENTS)
.associate { it.key to it.value }
} else {
withEntry
}
}
if (next !== before) {
sp.edit().putString(KEY, json.encodeToString(next)).apply()
}
}
fun clear() {
_entries.updateAndGet { emptyMap() }
sp.edit().putString(KEY, json.encodeToString(emptyMap<String, Enrichment>())).apply()
}
private fun load(): Map<String, Enrichment> = runCatching {
val s = sp.getString(KEY, null) ?: return emptyMap()
json.decodeFromString<Map<String, Enrichment>>(s)
}.getOrDefault(emptyMap())
}
object FeedEnrichment {
@Volatile private var instance: EnrichmentStore? = null
fun init(context: Context) {
if (instance == null) {
synchronized(this) {
if (instance == null) instance = EnrichmentStore(context.applicationContext)
}
}
}
fun get(): EnrichmentStore = instance
?: error("EnrichmentStore not initialized — call FeedEnrichment.init(context)")
}

View file

@ -19,8 +19,10 @@ package com.sulkta.straw.feature.feed
import androidx.lifecycle.ViewModel import androidx.lifecycle.ViewModel
import androidx.lifecycle.viewModelScope import androidx.lifecycle.viewModelScope
import com.sulkta.straw.data.ChannelRef import com.sulkta.straw.data.ChannelRef
import com.sulkta.straw.data.Enrichment
import com.sulkta.straw.data.FeedCache import com.sulkta.straw.data.FeedCache
import com.sulkta.straw.data.FeedCacheEntry import com.sulkta.straw.data.FeedCacheEntry
import com.sulkta.straw.data.FeedEnrichment
import com.sulkta.straw.data.Settings import com.sulkta.straw.data.Settings
import com.sulkta.straw.data.Subscriptions import com.sulkta.straw.data.Subscriptions
import com.sulkta.straw.feature.search.StreamItem import com.sulkta.straw.feature.search.StreamItem
@ -191,13 +193,19 @@ class SubscriptionFeedViewModel : ViewModel() {
.awaitAll() .awaitAll()
} }
pruneCacheToSubs(channels) pruneCacheToSubs(channels)
val freshItems = mergeFromCache(channels)
_ui.update { _ui.update {
SubscriptionFeedUiState( SubscriptionFeedUiState(
loading = false, loading = false,
items = mergeFromCache(channels), items = freshItems,
lastFetchedAt = System.currentTimeMillis(), lastFetchedAt = System.currentTimeMillis(),
) )
} }
// vc=66 — hybrid backfill. RSS-fed items have
// viewCount=0 + durationSeconds=0; kick a bounded
// background job that calls enrichFeedItem for the
// top items and pumps a fresh _ui emit when done.
enrichVisibleItems(freshItems)
// Persist what we just freshened. Off the main thread — // Persist what we just freshened. Off the main thread —
// JSON encode on 30 subs * 30 items is small but not // JSON encode on 30 subs * 30 items is small but not
// free, and SharedPreferences.apply is async anyway. // free, and SharedPreferences.apply is async anyway.
@ -283,7 +291,15 @@ class SubscriptionFeedViewModel : ViewModel() {
// Pre-compute recencyScore once per item — vc=35 audit // Pre-compute recencyScore once per item — vc=35 audit
// MED-Q15: sortedWith's comparator was invoking the regex // MED-Q15: sortedWith's comparator was invoking the regex
// twice per pair, so ~1800 regex matches on a 900-item merge. // twice per pair, so ~1800 regex matches on a 900-item merge.
//
// vc=66 — overlay FeedEnrichment data on each item so RSS-fed
// rows (viewCount=0, durationSeconds=0) get backfilled with
// metadata fetched by the background enrichment job below.
// Pure read of the enrichment store; the enrichment write
// path triggers a fresh _ui emit.
val enrichments = FeedEnrichment.get().entries.value
return channels.flatMap { ch -> channelCache[ch.url]?.items.orEmpty() } return channels.flatMap { ch -> channelCache[ch.url]?.items.orEmpty() }
.map { it.withEnrichment(enrichments) }
.map { it to it.recencyScore() } .map { it to it.recencyScore() }
.sortedWith( .sortedWith(
compareByDescending<Pair<StreamItem, Long>> { it.second } compareByDescending<Pair<StreamItem, Long>> { it.second }
@ -293,6 +309,73 @@ class SubscriptionFeedViewModel : ViewModel() {
.map { it.first } .map { it.first }
} }
/**
* Background enrichment: pulls viewCount + durationSeconds for the
* top-N freshly-merged items via the lightweight
* uniffi.strawcore.enrichFeedItem endpoint. Bounded parallel
* (8-wide) each call is ~500ms full streamInfo, so 30 items
* complete in ~2s. Skipped per-item when FeedEnrichment already
* has a fresh hit (TTL controlled by Settings.cacheTtl).
*
* Runs OFF viewModelScope so a refresh-cancel doesn't kill an
* enrichment that's almost done the background fill is for
* NEXT-open paint, no rush. Uses StrawApp.globalScope.
*/
private fun enrichVisibleItems(items: List<StreamItem>) {
val take = items.take(ENRICH_HEAD_COUNT)
.filter { it.viewCount <= 0L && it.durationSeconds <= 0L }
if (take.isEmpty()) return
com.sulkta.straw.StrawApp.globalScope.launch {
val gate = Semaphore(ENRICH_PARALLELISM)
coroutineScope {
take.map { item ->
async {
gate.withPermit {
val videoId = com.sulkta.straw.feature.detail.extractYtVideoId(item.url)
?: return@withPermit
if (FeedEnrichment.get().get(videoId) != null) return@withPermit
val md = runCatchingCancellable {
withContext(Dispatchers.IO) {
uniffi.strawcore.enrichFeedItem(item.url)
}
}.getOrNull() ?: return@withPermit
FeedEnrichment.get().put(
videoId,
md.viewCount,
md.durationSeconds,
)
}
}
}.awaitAll()
}
// Pump a fresh emit so the UI picks up the overlay.
withContext(Dispatchers.Main) {
val channels = Subscriptions.get().subs.value
_ui.update { it.copy(items = mergeFromCache(channels)) }
}
}
}
private val ENRICH_HEAD_COUNT = 30
private val ENRICH_PARALLELISM = 8
/**
* Apply an enrichment overlay to a StreamItem. Only fills fields
* that RSS left empty if the source already had non-zero values
* (e.g. a channelInfo path populated them) we don't clobber.
*/
private fun StreamItem.withEnrichment(
enrichments: Map<String, Enrichment>,
): StreamItem {
if (viewCount > 0L && durationSeconds > 0L) return this
val videoId = com.sulkta.straw.feature.detail.extractYtVideoId(url) ?: return this
val e = enrichments[videoId] ?: return this
return copy(
viewCount = if (viewCount > 0L) viewCount else e.viewCount,
durationSeconds = if (durationSeconds > 0L) durationSeconds else e.durationSeconds,
)
}
/** /**
* Clear in-memory cache. Called from Settings when the user flips * Clear in-memory cache. Called from Settings when the user flips
* off the local-cache toggle disk wipe via FeedCacheStore.clear() * off the local-cache toggle disk wipe via FeedCacheStore.clear()