From 7c7151186ea7830ce81df8745e894651d50f3a81 Mon Sep 17 00:00:00 2001
From: Kayos <cobb@sulkta.com>
Date: Mon, 25 May 2026 19:47:46 +0000
Subject: [PATCH] channel: extract avatar from pageHeaderRenderer + metadata
 fallback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Channels on the newer pageHeaderRenderer layout (most channels with a
2024+ refreshed header — WTYP, etc.) were getting empty avatars and
banners since the parse_channel_browse only extracted those from the
older c4TabbedHeaderRenderer branch.

Two fixes layered:

1. parse_page_header_avatar() — walks the deep ViewModel nest:
     header.content.pageHeaderViewModel.image
       .decoratedAvatarViewModel.avatar.avatarViewModel.image.sources[]
   Falls back to a couple of shallower nestings YT has used on this
   path historically. Returns ImageSet sorted by height ascending so
   .last() still picks the largest source.

2. metadata.channelMetadataRenderer.avatar.thumbnails[] backfill.
   Set whether the header is c4Tabbed or pageHeader, and the most
   reliable single avatar source. Used only when both header branches
   came back empty so we don't override a higher-quality header avatar.

Description-from-metadata extraction folded into the same metadata
walk to avoid the JSON tree twice.
---
 src/youtube/channel.rs | 84 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 76 insertions(+), 8 deletions(-)

diff --git a/src/youtube/channel.rs b/src/youtube/channel.rs
index f4e6e1e..bd1fe03 100644
--- a/src/youtube/channel.rs
+++ b/src/youtube/channel.rs
@@ -174,7 +174,11 @@ pub fn parse_channel_browse(channel_id: &str, body: &Value) -> ChannelInfo {
             });
         }
     }
-    // Alternative pageHeaderRenderer (newer flavor — 2025+)
+    // Alternative pageHeaderRenderer (newer flavor — 2025+).
+    // Avatar nests deep under decoratedAvatarViewModel.avatar.avatarViewModel
+    // .image.sources[]; banner under contentBanner...image.sources[].
+    // YT keeps adding wrappers in this branch — walk all the known
+    // intermediates and parse the first sources[] we find.
     else if let Some(header) = body
         .get("header")
         .and_then(|h| h.get("pageHeaderRenderer"))
@@ -182,16 +186,27 @@ pub fn parse_channel_browse(channel_id: &str, body: &Value) -> ChannelInfo {
         if let Some(s) = header.get("pageTitle").and_then(|t| t.as_str()) {
             info.name = s.to_string();
         }
+        if info.avatars.is_empty() {
+            info.avatars = parse_page_header_avatar(header);
+        }
     }
 
-    // microformat / description
-    if let Some(desc) = body
+    // microformat / description / avatar fallback. metadata.channel
+    // MetadataRenderer.avatar.thumbnails[] is the most reliable avatar
+    // path — present whether the header is c4TabbedHeaderRenderer or
+    // pageHeaderRenderer. Use it as a last-resort backfill so newer
+    // channel layouts that don't expose the avatar in the header still
+    // give us SOMETHING.
+    let metadata = body
         .get("metadata")
-        .and_then(|m| m.get("channelMetadataRenderer"))
-        .and_then(|m| m.get("description"))
-        .and_then(|d| d.as_str())
-    {
-        info.description = desc.to_string();
+        .and_then(|m| m.get("channelMetadataRenderer"));
+    if let Some(m) = metadata {
+        if let Some(desc) = m.get("description").and_then(|d| d.as_str()) {
+            info.description = desc.to_string();
+        }
+        if info.avatars.is_empty() {
+            info.avatars = parse_image_set(m.get("avatar"));
+        }
     }
 
     // Note: recent_videos are populated by a separate second browse to
@@ -461,6 +476,59 @@ fn parse_image_set(value: Option<&Value>) -> ImageSet {
     out
 }
 
+/// Avatar extraction for the newer pageHeaderRenderer flavor.
+///
+/// Walks `header.content.pageHeaderViewModel.image` and finds the first
+/// `sources[]` array it can — that lives under either
+/// `decoratedAvatarViewModel.avatar.avatarViewModel.image.sources` or,
+/// on some channels, the slightly shallower
+/// `avatarViewModel.image.sources`. Returns ImageSet ordered by source
+/// height ascending — matches what parse_image_set produces for the
+/// legacy `thumbnails[]` path, so .last() still gives the largest one.
+fn parse_page_header_avatar(header: &Value) -> ImageSet {
+    let content = header
+        .get("content")
+        .and_then(|c| c.get("pageHeaderViewModel"));
+    let Some(content) = content else { return Vec::new() };
+    let image = content.get("image");
+    let Some(image) = image else { return Vec::new() };
+
+    // Try a couple of nestings — YT migrates the exact path occasionally
+    // and we want to keep parsing through future shuffles.
+    let candidates = [
+        image
+            .get("decoratedAvatarViewModel")
+            .and_then(|d| d.get("avatar"))
+            .and_then(|a| a.get("avatarViewModel"))
+            .and_then(|a| a.get("image"))
+            .and_then(|i| i.get("sources")),
+        image
+            .get("avatarViewModel")
+            .and_then(|a| a.get("image"))
+            .and_then(|i| i.get("sources")),
+        image.get("sources"),
+    ];
+
+    for src in candidates.into_iter().flatten() {
+        if let Some(arr) = src.as_array() {
+            let mut out = Vec::with_capacity(arr.len());
+            for s in arr {
+                let Some(url) = s.get("url").and_then(|v| v.as_str()) else { continue };
+                let w = s.get("width").and_then(|v| v.as_i64()).unwrap_or(-1) as i32;
+                let h = s.get("height").and_then(|v| v.as_i64()).unwrap_or(-1) as i32;
+                out.push(Image::new(url, h, w, ResolutionLevel::from_height(h)));
+            }
+            // Match parse_image_set ordering — caller uses .last() for
+            // the largest size.
+            out.sort_by_key(|i| i.height());
+            if !out.is_empty() {
+                return out;
+            }
+        }
+    }
+    Vec::new()
+}
+
 fn parse_subscriber_count(text: &str) -> i64 {
     // "12.5M subscribers" / "1.2K subscribers" / "350 subscribers"
     let stripped = text