strawcore/src/youtube/channel.rs
Kayos d4000a9f9a Cleanup: drop playlist + suggestion + dead client constants + suppress_unused stubs
Round-2 cruft audit punch list — mechanical deletes, no behavior change.

Whole modules deleted (no wrapper consumer):
  * youtube/playlist_extractor.rs (297 LOC) — full playlist extraction
  * youtube/linkhandler/playlist.rs (81 LOC) — playlist URL parser
  * youtube/suggestion_extractor.rs (91 LOC) — search-as-you-type
  * tests/stream_phase4_offline.rs (186 LOC) — tautological test

Dead pub fns + enum variants + constants:
  * WEB_REMIX_* constants (3) + WEB_MUSIC_ANALYTICS_* constants (3)
  * InnertubeClientRequestInfo::of_web_music_analytics_charts_client
    factory + its charts_client_omits_platform_and_screen test
  * SearchFilter::Music{Songs,Videos,Albums,Playlists,Artists} variants
    (5 of 9 cases) + uses_music_endpoint helper + the search_extractor
    'music search not implemented' reject branch
  * Two #[allow(dead_code)] _suppress_unused stub fns and the imports
    they were keeping alive (std::sync::Arc in js/extractor.rs,
    NetworkError in stream_extractor.rs)

Renamed:
  * search_extractor::test_helpers -> renderer_helpers. Mis-named:
    it's production code called from channel.rs, not a test fixture.

potoken/ kept and documented as the designed Phase-5 extension point
for YouTube bot-detection — wrapper's Android side hasn't registered
a real provider yet, but the trait + global slot stay so when YT
forces po_token universally the integration is one Kotlin patch away,
not a Rust-side rewrite.

~580 LOC removed from production. Wrapper does not need to change.
2026-05-26 22:16:11 -07:00

594 lines
23 KiB
Rust

// YoutubeChannelExtractor + helper.resolveChannelId — fetches channel
// info via /youtubei/v1/browse. Mirrors NPE
// services/youtube/extractors/YoutubeChannelExtractor.java +
// YoutubeChannelHelper.java.
//
// Handle / custom URL / legacy user resolution: NPE issues
// `/youtubei/v1/navigation/resolve_url` against the `youtube.com/@handle`
// URL, walks `endpoint.browseEndpoint.browseId` to get the UC... id, and
// retries the browse call. Up to 3 redirect hops.
//
// Tab parsing (videos/shorts/live/playlists) is in audit Track D §5 —
// `tabs[].tabRenderer.endpoint.browseEndpoint.params` is the magic
// base64 needed to land on each tab.
use serde_json::Value;
use crate::downloader::request::Request;
use crate::exceptions::{ExtractionError, NetworkError, ParsingError};
use crate::image::{Image, ImageSet, ResolutionLevel};
use crate::newpipe::NewPipe;
use crate::stream::StreamInfoItem;
use crate::youtube::client_request::build_desktop_envelope;
use crate::youtube::constants::*;
use crate::youtube::linkhandler::channel::ChannelIdentifier;
use crate::youtube::parsing::{web_client_version, youtube_post_headers};
#[derive(Clone, Debug, Default)]
pub struct ChannelInfo {
pub channel_id: String,
pub url: String,
pub name: String,
pub description: String,
pub avatars: ImageSet,
pub banners: ImageSet,
pub subscriber_count: i64,
pub verified: bool,
pub recent_videos: Vec<StreamInfoItem>,
pub videos_continuation: Option<String>,
}
pub fn channel_info(identifier: ChannelIdentifier) -> Result<ChannelInfo, ExtractionError> {
let resolved = match identifier {
ChannelIdentifier::DirectId(id) => id,
ChannelIdentifier::Handle(h) => resolve_handle_to_channel_id(&format!("@{h}"))?,
ChannelIdentifier::Custom(c) => resolve_handle_to_channel_id(&format!("c/{c}"))?,
ChannelIdentifier::LegacyUser(u) => resolve_handle_to_channel_id(&format!("user/{u}"))?,
};
fetch_channel_browse(&resolved)
}
pub fn resolve_handle_to_channel_id(url_fragment: &str) -> Result<String, ExtractionError> {
let downloader = NewPipe::downloader().ok_or(ExtractionError::DownloaderMissing)?;
let localization = NewPipe::preferred_localization();
let content_country = NewPipe::preferred_content_country();
let target_url = format!("https://www.youtube.com/{url_fragment}");
let mut envelope = build_desktop_envelope(&localization, &content_country, &web_client_version());
if let Value::Object(ref mut map) = envelope {
map.insert("url".into(), Value::String(target_url));
}
let url = format!("{YOUTUBEI_V1_URL}navigation/resolve_url{DISABLE_PRETTY_PRINT_PARAM}");
let body = serde_json::to_vec(&envelope).map_err(|e| {
ExtractionError::Parsing(ParsingError::Invalid(format!("serialize: {e}")))
})?;
let mut builder = Request::post(&url, body);
for (k, v) in youtube_post_headers() {
builder = builder.add_header(&k, &v);
}
let resp = downloader.execute(builder.build())?;
if resp.response_code() != 200 {
return Err(ExtractionError::Network(NetworkError::Transport(format!(
"resolve_url HTTP {}",
resp.response_code()
))));
}
let parsed: Value = serde_json::from_str(resp.response_body())
.map_err(|e| ExtractionError::Parsing(ParsingError::JsonShape(e.to_string())))?;
parsed
.get("endpoint")
.and_then(|e| e.get("browseEndpoint"))
.and_then(|b| b.get("browseId"))
.and_then(|i| i.as_str())
.map(String::from)
.ok_or_else(|| {
ExtractionError::Parsing(ParsingError::MissingField(
"endpoint.browseEndpoint.browseId".into(),
))
})
}
/// Magic params for the channel "Videos" tab — opaque base64. Same constant
/// NPE uses (audit Track A §2.4). Sending it with the channel browseId
/// switches YT's response from the Home tab to the Videos tab.
const CHANNEL_VIDEOS_TAB_PARAMS: &str = "EgZ2aWRlb3PyBgQKAjoA";
pub fn fetch_channel_browse(channel_id: &str) -> Result<ChannelInfo, ExtractionError> {
// First browse — Home tab. Gives us channel header + metadata. YT
// doesn't ship video items here for most channels in 2026.
let home_response = fetch_browse(channel_id, None)?;
let mut info = parse_channel_browse(channel_id, &home_response);
// Second browse — Videos tab. Best-effort: any failure here just
// leaves recent_videos empty (header still populated from first browse).
if let Ok(videos_response) = fetch_browse(channel_id, Some(CHANNEL_VIDEOS_TAB_PARAMS)) {
info.recent_videos = parse_videos_tab(&videos_response);
if let Some(token) = parse_videos_continuation(&videos_response) {
info.videos_continuation = Some(token);
}
}
Ok(info)
}
fn fetch_browse(channel_id: &str, params: Option<&str>) -> Result<Value, ExtractionError> {
let downloader = NewPipe::downloader().ok_or(ExtractionError::DownloaderMissing)?;
let localization = NewPipe::preferred_localization();
let content_country = NewPipe::preferred_content_country();
let mut envelope =
build_desktop_envelope(&localization, &content_country, &web_client_version());
if let Value::Object(ref mut map) = envelope {
map.insert("browseId".into(), Value::String(channel_id.into()));
if let Some(p) = params {
map.insert("params".into(), Value::String(p.into()));
}
}
let url = format!("{YOUTUBEI_V1_URL}browse{DISABLE_PRETTY_PRINT_PARAM}");
let body = serde_json::to_vec(&envelope).map_err(|e| {
ExtractionError::Parsing(ParsingError::Invalid(format!("serialize: {e}")))
})?;
let mut builder = Request::post(&url, body);
for (k, v) in youtube_post_headers() {
builder = builder.add_header(&k, &v);
}
let resp = downloader.execute(builder.build())?;
if resp.response_code() != 200 {
return Err(ExtractionError::Network(NetworkError::Transport(format!(
"browse HTTP {}",
resp.response_code()
))));
}
serde_json::from_str(resp.response_body())
.map_err(|e| ExtractionError::Parsing(ParsingError::JsonShape(e.to_string())))
}
pub fn parse_channel_browse(channel_id: &str, body: &Value) -> ChannelInfo {
let mut info = ChannelInfo {
channel_id: channel_id.to_string(),
url: format!("https://www.youtube.com/channel/{channel_id}"),
..ChannelInfo::default()
};
// C4_TABBED header flavor is the most common.
if let Some(header) = body
.get("header")
.and_then(|h| h.get("c4TabbedHeaderRenderer"))
{
if let Some(s) = header.get("title").and_then(|t| t.as_str()) {
info.name = s.to_string();
}
info.avatars = parse_image_set(header.get("avatar"));
info.banners = parse_image_set(header.get("banner"));
if let Some(text) = header
.get("subscriberCountText")
.and_then(|s| s.get("simpleText"))
.and_then(|s| s.as_str())
{
info.subscriber_count = parse_subscriber_count(text);
}
if let Some(badges) = header.get("badges").and_then(|b| b.as_array()) {
info.verified = badges.iter().any(|b| {
b.get("metadataBadgeRenderer")
.and_then(|m| m.get("style"))
.and_then(|s| s.as_str())
.map(|s| s.starts_with("BADGE_STYLE_TYPE_VERIFIED"))
.unwrap_or(false)
});
}
}
// Alternative pageHeaderRenderer (newer flavor — 2025+).
// Avatar nests deep under decoratedAvatarViewModel.avatar.avatarViewModel
// .image.sources[]; banner under contentBanner...image.sources[].
// YT keeps adding wrappers in this branch — walk all the known
// intermediates and parse the first sources[] we find.
else if let Some(header) = body
.get("header")
.and_then(|h| h.get("pageHeaderRenderer"))
{
if let Some(s) = header.get("pageTitle").and_then(|t| t.as_str()) {
info.name = s.to_string();
}
if info.avatars.is_empty() {
info.avatars = parse_page_header_avatar(header);
}
}
// microformat / description / avatar fallback. metadata.channel
// MetadataRenderer.avatar.thumbnails[] is the most reliable avatar
// path — present whether the header is c4TabbedHeaderRenderer or
// pageHeaderRenderer. Use it as a last-resort backfill so newer
// channel layouts that don't expose the avatar in the header still
// give us SOMETHING.
let metadata = body
.get("metadata")
.and_then(|m| m.get("channelMetadataRenderer"));
if let Some(m) = metadata {
if let Some(desc) = m.get("description").and_then(|d| d.as_str()) {
info.description = desc.to_string();
}
if info.avatars.is_empty() {
info.avatars = parse_image_set(m.get("avatar"));
}
}
// Note: recent_videos are populated by a separate second browse to
// the Videos tab — see fetch_channel_browse. The first browse's Home
// tab does NOT contain a clean video grid in current YT.
info
}
/// Walk the Videos-tab browse response into a list of StreamInfoItems.
/// Handles BOTH old-style `videoRenderer` items and new-style
/// `lockupViewModel` items (YT migrated channel-videos UI to
/// lockupViewModel around 2024).
fn parse_videos_tab(body: &Value) -> Vec<StreamInfoItem> {
let mut out = Vec::new();
let tabs = body
.get("contents")
.and_then(|c| c.get("twoColumnBrowseResultsRenderer"))
.and_then(|c| c.get("tabs"))
.and_then(|t| t.as_array());
let Some(tabs) = tabs else { return out };
for tab in tabs {
let Some(tr) = tab.get("tabRenderer") else { continue };
if !tr
.get("selected")
.and_then(|s| s.as_bool())
.unwrap_or(false)
{
continue;
}
let Some(items) = tr
.get("content")
.and_then(|c| c.get("richGridRenderer"))
.and_then(|g| g.get("contents"))
.and_then(|c| c.as_array())
else {
continue;
};
for cell in items {
// richItemRenderer carries either videoRenderer (legacy) or
// lockupViewModel (current 2026 YT).
let Some(content) = cell
.get("richItemRenderer")
.and_then(|r| r.get("content"))
else {
continue;
};
if let Some(vr) = content.get("videoRenderer") {
if let Some(item) = crate::youtube::search_extractor::renderer_helpers::video_renderer_to_item(vr) {
out.push(item);
}
} else if let Some(lvm) = content.get("lockupViewModel") {
if let Some(item) = parse_lockup_video(lvm) {
out.push(item);
}
}
}
}
out
}
fn parse_videos_continuation(body: &Value) -> Option<String> {
let tabs = body
.get("contents")
.and_then(|c| c.get("twoColumnBrowseResultsRenderer"))
.and_then(|c| c.get("tabs"))
.and_then(|t| t.as_array())?;
for tab in tabs {
let Some(tr) = tab.get("tabRenderer") else { continue };
if !tr.get("selected").and_then(|s| s.as_bool()).unwrap_or(false) {
continue;
}
let items = tr
.get("content")
.and_then(|c| c.get("richGridRenderer"))
.and_then(|g| g.get("contents"))
.and_then(|c| c.as_array())?;
for cell in items {
if let Some(token) = cell
.get("continuationItemRenderer")
.and_then(|s| s.get("continuationEndpoint"))
.and_then(|c| c.get("continuationCommand"))
.and_then(|c| c.get("token"))
.and_then(|t| t.as_str())
{
return Some(token.to_string());
}
}
}
None
}
fn parse_lockup_video(lvm: &Value) -> Option<StreamInfoItem> {
// lockupViewModel only carries videos when contentType says so. Skip
// playlists, shorts collections, channel-redirects, etc.
let content_type = lvm.get("contentType").and_then(|v| v.as_str()).unwrap_or("");
if content_type != "LOCKUP_CONTENT_TYPE_VIDEO" {
return None;
}
let video_id = lvm.get("contentId").and_then(|v| v.as_str())?.to_string();
if video_id.len() != 11 {
return None;
}
let lockup_md = lvm
.get("metadata")
.and_then(|m| m.get("lockupMetadataViewModel"))?;
let title = lockup_md
.get("title")
.and_then(|t| t.get("content"))
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
// metadataRows[0] = ["1.1m views", "2 years ago"]. metadataRows[1] is
// sometimes uploader name (when shown on home/search lockups) but on
// a channel's own Videos tab it's not present (we know the channel).
let mut view_count = -1i64;
let mut upload_relative = String::new();
let mut uploader_name = String::new();
if let Some(cmv) = lockup_md
.get("metadata")
.and_then(|m| m.get("contentMetadataViewModel"))
{
if let Some(rows) = cmv.get("metadataRows").and_then(|r| r.as_array()) {
for row in rows {
let Some(parts) = row.get("metadataParts").and_then(|p| p.as_array()) else {
continue;
};
for part in parts {
let Some(txt) = part
.get("text")
.and_then(|t| t.get("content"))
.and_then(|v| v.as_str())
else {
continue;
};
let lc = txt.to_ascii_lowercase();
if lc.contains("view") && view_count < 0 {
view_count = parse_lockup_view_count(txt);
} else if (lc.contains("ago")
|| lc.contains("hour")
|| lc.contains("minute")
|| lc.contains("yesterday")
|| lc.contains("days")
|| lc.contains("weeks")
|| lc.contains("months")
|| lc.contains("years"))
&& upload_relative.is_empty()
{
upload_relative = txt.to_string();
} else if uploader_name.is_empty()
&& !lc.contains("view")
&& !lc.contains("ago")
{
uploader_name = txt.to_string();
}
}
}
}
}
// duration text lives in a thumbnail overlay badge ("3:14:08")
let mut duration_seconds = 0i64;
if let Some(overlays) = lvm
.get("contentImage")
.and_then(|c| c.get("thumbnailViewModel"))
.and_then(|t| t.get("overlays"))
.and_then(|o| o.as_array())
{
for ov in overlays {
if let Some(badges) = ov
.get("thumbnailBottomOverlayViewModel")
.and_then(|b| b.get("badges"))
.and_then(|b| b.as_array())
{
for b in badges {
if let Some(txt) = b
.get("thumbnailBadgeViewModel")
.and_then(|m| m.get("text"))
.and_then(|v| v.as_str())
{
if txt.contains(':') && duration_seconds == 0 {
duration_seconds = parse_duration_clock(txt);
}
}
}
}
}
}
// thumbnails — sources array, pre-sorted ascending by size
let mut thumbnails = Vec::new();
if let Some(sources) = lvm
.get("contentImage")
.and_then(|c| c.get("thumbnailViewModel"))
.and_then(|t| t.get("image"))
.and_then(|i| i.get("sources"))
.and_then(|s| s.as_array())
{
for src in sources {
if let Some(url) = src.get("url").and_then(|v| v.as_str()) {
let h = src.get("height").and_then(|v| v.as_i64()).unwrap_or(-1) as i32;
let w = src.get("width").and_then(|v| v.as_i64()).unwrap_or(-1) as i32;
thumbnails.push(Image::new(url, h, w, ResolutionLevel::from_height(h)));
}
}
}
Some(StreamInfoItem {
service_id: 0,
url: format!("https://www.youtube.com/watch?v={video_id}"),
name: title,
thumbnails,
uploader_name,
uploader_url: String::new(),
uploader_id: String::new(),
uploader_verified: false,
duration_seconds,
view_count,
upload_date_relative: upload_relative,
stream_type: Some(crate::stream::StreamType::VideoStream),
short_description: String::new(),
})
}
fn parse_lockup_view_count(text: &str) -> i64 {
// "1.1m views" / "23k views" / "5.4b views" / "999 views"
let cleaned = text.to_ascii_lowercase().replace(",", "");
let cleaned = cleaned.replace(" views", "").replace(" view", "");
let cleaned = cleaned.trim();
let (num, mult) = if let Some(n) = cleaned.strip_suffix('k') {
(n.trim(), 1_000.0)
} else if let Some(n) = cleaned.strip_suffix('m') {
(n.trim(), 1_000_000.0)
} else if let Some(n) = cleaned.strip_suffix('b') {
(n.trim(), 1_000_000_000.0)
} else {
(cleaned, 1.0)
};
num.parse::<f64>().map(|n| (n * mult) as i64).unwrap_or(-1)
}
fn parse_duration_clock(text: &str) -> i64 {
let mut total = 0i64;
for part in text.split(':') {
let n: i64 = part.trim().parse().unwrap_or(0);
total = total * 60 + n;
}
total
}
fn parse_image_set(value: Option<&Value>) -> ImageSet {
let mut out = Vec::new();
if let Some(arr) = value
.and_then(|v| v.get("thumbnails"))
.and_then(|t| t.as_array())
{
for t in arr {
if let Some(url) = t.get("url").and_then(|v| v.as_str()) {
let h = t.get("height").and_then(|v| v.as_i64()).unwrap_or(-1) as i32;
let w = t.get("width").and_then(|v| v.as_i64()).unwrap_or(-1) as i32;
out.push(Image::new(url, h, w, ResolutionLevel::from_height(h)));
}
}
}
out
}
/// Avatar extraction for the newer pageHeaderRenderer flavor.
///
/// Walks `header.content.pageHeaderViewModel.image` and finds the first
/// `sources[]` array it can — that lives under either
/// `decoratedAvatarViewModel.avatar.avatarViewModel.image.sources` or,
/// on some channels, the slightly shallower
/// `avatarViewModel.image.sources`. Returns ImageSet ordered by source
/// height ascending — matches what parse_image_set produces for the
/// legacy `thumbnails[]` path, so .last() still gives the largest one.
fn parse_page_header_avatar(header: &Value) -> ImageSet {
let content = header
.get("content")
.and_then(|c| c.get("pageHeaderViewModel"));
let Some(content) = content else { return Vec::new() };
let image = content.get("image");
let Some(image) = image else { return Vec::new() };
// Try a couple of nestings — YT migrates the exact path occasionally
// and we want to keep parsing through future shuffles.
let candidates = [
image
.get("decoratedAvatarViewModel")
.and_then(|d| d.get("avatar"))
.and_then(|a| a.get("avatarViewModel"))
.and_then(|a| a.get("image"))
.and_then(|i| i.get("sources")),
image
.get("avatarViewModel")
.and_then(|a| a.get("image"))
.and_then(|i| i.get("sources")),
image.get("sources"),
];
for src in candidates.into_iter().flatten() {
if let Some(arr) = src.as_array() {
let mut out = Vec::with_capacity(arr.len());
for s in arr {
let Some(url) = s.get("url").and_then(|v| v.as_str()) else { continue };
let w = s.get("width").and_then(|v| v.as_i64()).unwrap_or(-1) as i32;
let h = s.get("height").and_then(|v| v.as_i64()).unwrap_or(-1) as i32;
out.push(Image::new(url, h, w, ResolutionLevel::from_height(h)));
}
// Match parse_image_set ordering — caller uses .last() for
// the largest size.
out.sort_by_key(|i| i.height());
if !out.is_empty() {
return out;
}
}
}
Vec::new()
}
fn parse_subscriber_count(text: &str) -> i64 {
// "12.5M subscribers" / "1.2K subscribers" / "350 subscribers"
let stripped = text
.replace("subscribers", "")
.replace("subscriber", "")
.trim()
.to_string();
let (num, mult) = if let Some(n) = stripped.strip_suffix('K') {
(n.trim(), 1_000.0)
} else if let Some(n) = stripped.strip_suffix('M') {
(n.trim(), 1_000_000.0)
} else if let Some(n) = stripped.strip_suffix('B') {
(n.trim(), 1_000_000_000.0)
} else {
(stripped.trim(), 1.0)
};
num.replace(',', "")
.parse::<f64>()
.map(|n| (n * mult) as i64)
.unwrap_or(-1)
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn subscriber_count_parsing() {
assert_eq!(parse_subscriber_count("350 subscribers"), 350);
assert_eq!(parse_subscriber_count("1.2K subscribers"), 1_200);
assert_eq!(parse_subscriber_count("12.5M subscribers"), 12_500_000);
assert_eq!(parse_subscriber_count("2B subscribers"), 2_000_000_000);
assert_eq!(parse_subscriber_count("1 subscriber"), 1);
}
#[test]
fn parses_c4_tabbed_header() {
let body = json!({
"header":{"c4TabbedHeaderRenderer":{
"title":"NoCopyrightSounds",
"subscriberCountText":{"simpleText":"42.5M subscribers"},
"badges":[{"metadataBadgeRenderer":{"style":"BADGE_STYLE_TYPE_VERIFIED_ARTIST"}}]
}},
"metadata":{"channelMetadataRenderer":{"description":"Royalty-free music"}}
});
let info = parse_channel_browse("UC_aEa8K-EOJ3D6gOs7HcyNg", &body);
assert_eq!(info.name, "NoCopyrightSounds");
assert_eq!(info.description, "Royalty-free music");
assert_eq!(info.subscriber_count, 42_500_000);
assert!(info.verified);
assert_eq!(info.channel_id, "UC_aEa8K-EOJ3D6gOs7HcyNg");
}
#[test]
fn parses_page_header_renderer_fallback() {
let body = json!({
"header":{"pageHeaderRenderer":{"pageTitle":"@SomeChannel"}}
});
let info = parse_channel_browse("UCxxx", &body);
assert_eq!(info.name, "@SomeChannel");
}
}