From 7c7151186ea7830ce81df8745e894651d50f3a81 Mon Sep 17 00:00:00 2001 From: Kayos Date: Mon, 25 May 2026 19:47:46 +0000 Subject: [PATCH] channel: extract avatar from pageHeaderRenderer + metadata fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Channels on the newer pageHeaderRenderer layout (most channels with a 2024+ refreshed header — WTYP, etc.) were getting empty avatars and banners since the parse_channel_browse only extracted those from the older c4TabbedHeaderRenderer branch. Two fixes layered: 1. parse_page_header_avatar() — walks the deep ViewModel nest: header.content.pageHeaderViewModel.image .decoratedAvatarViewModel.avatar.avatarViewModel.image.sources[] Falls back to a couple of shallower nestings YT has used on this path historically. Returns ImageSet sorted by height ascending so .last() still picks the largest source. 2. metadata.channelMetadataRenderer.avatar.thumbnails[] backfill. Set whether the header is c4Tabbed or pageHeader, and the most reliable single avatar source. Used only when both header branches came back empty so we don't override a higher-quality header avatar. Description-from-metadata extraction folded into the same metadata walk to avoid the JSON tree twice. --- src/youtube/channel.rs | 84 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 76 insertions(+), 8 deletions(-) diff --git a/src/youtube/channel.rs b/src/youtube/channel.rs index f4e6e1e..bd1fe03 100644 --- a/src/youtube/channel.rs +++ b/src/youtube/channel.rs @@ -174,7 +174,11 @@ pub fn parse_channel_browse(channel_id: &str, body: &Value) -> ChannelInfo { }); } } - // Alternative pageHeaderRenderer (newer flavor — 2025+) + // Alternative pageHeaderRenderer (newer flavor — 2025+). + // Avatar nests deep under decoratedAvatarViewModel.avatar.avatarViewModel + // .image.sources[]; banner under contentBanner...image.sources[]. + // YT keeps adding wrappers in this branch — walk all the known + // intermediates and parse the first sources[] we find. else if let Some(header) = body .get("header") .and_then(|h| h.get("pageHeaderRenderer")) @@ -182,16 +186,27 @@ pub fn parse_channel_browse(channel_id: &str, body: &Value) -> ChannelInfo { if let Some(s) = header.get("pageTitle").and_then(|t| t.as_str()) { info.name = s.to_string(); } + if info.avatars.is_empty() { + info.avatars = parse_page_header_avatar(header); + } } - // microformat / description - if let Some(desc) = body + // microformat / description / avatar fallback. metadata.channel + // MetadataRenderer.avatar.thumbnails[] is the most reliable avatar + // path — present whether the header is c4TabbedHeaderRenderer or + // pageHeaderRenderer. Use it as a last-resort backfill so newer + // channel layouts that don't expose the avatar in the header still + // give us SOMETHING. + let metadata = body .get("metadata") - .and_then(|m| m.get("channelMetadataRenderer")) - .and_then(|m| m.get("description")) - .and_then(|d| d.as_str()) - { - info.description = desc.to_string(); + .and_then(|m| m.get("channelMetadataRenderer")); + if let Some(m) = metadata { + if let Some(desc) = m.get("description").and_then(|d| d.as_str()) { + info.description = desc.to_string(); + } + if info.avatars.is_empty() { + info.avatars = parse_image_set(m.get("avatar")); + } } // Note: recent_videos are populated by a separate second browse to @@ -461,6 +476,59 @@ fn parse_image_set(value: Option<&Value>) -> ImageSet { out } +/// Avatar extraction for the newer pageHeaderRenderer flavor. +/// +/// Walks `header.content.pageHeaderViewModel.image` and finds the first +/// `sources[]` array it can — that lives under either +/// `decoratedAvatarViewModel.avatar.avatarViewModel.image.sources` or, +/// on some channels, the slightly shallower +/// `avatarViewModel.image.sources`. Returns ImageSet ordered by source +/// height ascending — matches what parse_image_set produces for the +/// legacy `thumbnails[]` path, so .last() still gives the largest one. +fn parse_page_header_avatar(header: &Value) -> ImageSet { + let content = header + .get("content") + .and_then(|c| c.get("pageHeaderViewModel")); + let Some(content) = content else { return Vec::new() }; + let image = content.get("image"); + let Some(image) = image else { return Vec::new() }; + + // Try a couple of nestings — YT migrates the exact path occasionally + // and we want to keep parsing through future shuffles. + let candidates = [ + image + .get("decoratedAvatarViewModel") + .and_then(|d| d.get("avatar")) + .and_then(|a| a.get("avatarViewModel")) + .and_then(|a| a.get("image")) + .and_then(|i| i.get("sources")), + image + .get("avatarViewModel") + .and_then(|a| a.get("image")) + .and_then(|i| i.get("sources")), + image.get("sources"), + ]; + + for src in candidates.into_iter().flatten() { + if let Some(arr) = src.as_array() { + let mut out = Vec::with_capacity(arr.len()); + for s in arr { + let Some(url) = s.get("url").and_then(|v| v.as_str()) else { continue }; + let w = s.get("width").and_then(|v| v.as_i64()).unwrap_or(-1) as i32; + let h = s.get("height").and_then(|v| v.as_i64()).unwrap_or(-1) as i32; + out.push(Image::new(url, h, w, ResolutionLevel::from_height(h))); + } + // Match parse_image_set ordering — caller uses .last() for + // the largest size. + out.sort_by_key(|i| i.height()); + if !out.is_empty() { + return out; + } + } + } + Vec::new() +} + fn parse_subscriber_count(text: &str) -> i64 { // "12.5M subscribers" / "1.2K subscribers" / "350 subscribers" let stripped = text