channel: extract avatar from pageHeaderRenderer + metadata fallback

Channels on the newer pageHeaderRenderer layout (most channels with a
2024+ refreshed header — WTYP, etc.) were getting empty avatars and
banners since the parse_channel_browse only extracted those from the
older c4TabbedHeaderRenderer branch.

Two fixes layered:

1. parse_page_header_avatar() — walks the deep ViewModel nest:
     header.content.pageHeaderViewModel.image
       .decoratedAvatarViewModel.avatar.avatarViewModel.image.sources[]
   Falls back to a couple of shallower nestings YT has used on this
   path historically. Returns ImageSet sorted by height ascending so
   .last() still picks the largest source.

2. metadata.channelMetadataRenderer.avatar.thumbnails[] backfill.
   Set whether the header is c4Tabbed or pageHeader, and the most
   reliable single avatar source. Used only when both header branches
   came back empty so we don't override a higher-quality header avatar.

Description-from-metadata extraction folded into the same metadata
walk to avoid the JSON tree twice.
This commit is contained in:
Kayos 2026-05-25 19:47:46 +00:00
parent e6fbbb79b4
commit 7c7151186e

View file

@ -174,7 +174,11 @@ pub fn parse_channel_browse(channel_id: &str, body: &Value) -> ChannelInfo {
});
}
}
// Alternative pageHeaderRenderer (newer flavor — 2025+)
// Alternative pageHeaderRenderer (newer flavor — 2025+).
// Avatar nests deep under decoratedAvatarViewModel.avatar.avatarViewModel
// .image.sources[]; banner under contentBanner...image.sources[].
// YT keeps adding wrappers in this branch — walk all the known
// intermediates and parse the first sources[] we find.
else if let Some(header) = body
.get("header")
.and_then(|h| h.get("pageHeaderRenderer"))
@ -182,16 +186,27 @@ pub fn parse_channel_browse(channel_id: &str, body: &Value) -> ChannelInfo {
if let Some(s) = header.get("pageTitle").and_then(|t| t.as_str()) {
info.name = s.to_string();
}
if info.avatars.is_empty() {
info.avatars = parse_page_header_avatar(header);
}
}
// microformat / description
if let Some(desc) = body
// microformat / description / avatar fallback. metadata.channel
// MetadataRenderer.avatar.thumbnails[] is the most reliable avatar
// path — present whether the header is c4TabbedHeaderRenderer or
// pageHeaderRenderer. Use it as a last-resort backfill so newer
// channel layouts that don't expose the avatar in the header still
// give us SOMETHING.
let metadata = body
.get("metadata")
.and_then(|m| m.get("channelMetadataRenderer"))
.and_then(|m| m.get("description"))
.and_then(|d| d.as_str())
{
info.description = desc.to_string();
.and_then(|m| m.get("channelMetadataRenderer"));
if let Some(m) = metadata {
if let Some(desc) = m.get("description").and_then(|d| d.as_str()) {
info.description = desc.to_string();
}
if info.avatars.is_empty() {
info.avatars = parse_image_set(m.get("avatar"));
}
}
// Note: recent_videos are populated by a separate second browse to
@ -461,6 +476,59 @@ fn parse_image_set(value: Option<&Value>) -> ImageSet {
out
}
/// Avatar extraction for the newer pageHeaderRenderer flavor.
///
/// Walks `header.content.pageHeaderViewModel.image` and finds the first
/// `sources[]` array it can — that lives under either
/// `decoratedAvatarViewModel.avatar.avatarViewModel.image.sources` or,
/// on some channels, the slightly shallower
/// `avatarViewModel.image.sources`. Returns ImageSet ordered by source
/// height ascending — matches what parse_image_set produces for the
/// legacy `thumbnails[]` path, so .last() still gives the largest one.
fn parse_page_header_avatar(header: &Value) -> ImageSet {
let content = header
.get("content")
.and_then(|c| c.get("pageHeaderViewModel"));
let Some(content) = content else { return Vec::new() };
let image = content.get("image");
let Some(image) = image else { return Vec::new() };
// Try a couple of nestings — YT migrates the exact path occasionally
// and we want to keep parsing through future shuffles.
let candidates = [
image
.get("decoratedAvatarViewModel")
.and_then(|d| d.get("avatar"))
.and_then(|a| a.get("avatarViewModel"))
.and_then(|a| a.get("image"))
.and_then(|i| i.get("sources")),
image
.get("avatarViewModel")
.and_then(|a| a.get("image"))
.and_then(|i| i.get("sources")),
image.get("sources"),
];
for src in candidates.into_iter().flatten() {
if let Some(arr) = src.as_array() {
let mut out = Vec::with_capacity(arr.len());
for s in arr {
let Some(url) = s.get("url").and_then(|v| v.as_str()) else { continue };
let w = s.get("width").and_then(|v| v.as_i64()).unwrap_or(-1) as i32;
let h = s.get("height").and_then(|v| v.as_i64()).unwrap_or(-1) as i32;
out.push(Image::new(url, h, w, ResolutionLevel::from_height(h)));
}
// Match parse_image_set ordering — caller uses .last() for
// the largest size.
out.sort_by_key(|i| i.height());
if !out.is_empty() {
return out;
}
}
}
Vec::new()
}
fn parse_subscriber_count(text: &str) -> i64 {
// "12.5M subscribers" / "1.2K subscribers" / "350 subscribers"
let stripped = text