channel: extract avatar from pageHeaderRenderer + metadata fallback
Channels on the newer pageHeaderRenderer layout (most channels with a
2024+ refreshed header — WTYP, etc.) were getting empty avatars and
banners since the parse_channel_browse only extracted those from the
older c4TabbedHeaderRenderer branch.
Two fixes layered:
1. parse_page_header_avatar() — walks the deep ViewModel nest:
header.content.pageHeaderViewModel.image
.decoratedAvatarViewModel.avatar.avatarViewModel.image.sources[]
Falls back to a couple of shallower nestings YT has used on this
path historically. Returns ImageSet sorted by height ascending so
.last() still picks the largest source.
2. metadata.channelMetadataRenderer.avatar.thumbnails[] backfill.
Set whether the header is c4Tabbed or pageHeader, and the most
reliable single avatar source. Used only when both header branches
came back empty so we don't override a higher-quality header avatar.
Description-from-metadata extraction folded into the same metadata
walk to avoid the JSON tree twice.
This commit is contained in:
parent
e6fbbb79b4
commit
7c7151186e
1 changed files with 76 additions and 8 deletions
|
|
@ -174,7 +174,11 @@ pub fn parse_channel_browse(channel_id: &str, body: &Value) -> ChannelInfo {
|
|||
});
|
||||
}
|
||||
}
|
||||
// Alternative pageHeaderRenderer (newer flavor — 2025+)
|
||||
// Alternative pageHeaderRenderer (newer flavor — 2025+).
|
||||
// Avatar nests deep under decoratedAvatarViewModel.avatar.avatarViewModel
|
||||
// .image.sources[]; banner under contentBanner...image.sources[].
|
||||
// YT keeps adding wrappers in this branch — walk all the known
|
||||
// intermediates and parse the first sources[] we find.
|
||||
else if let Some(header) = body
|
||||
.get("header")
|
||||
.and_then(|h| h.get("pageHeaderRenderer"))
|
||||
|
|
@ -182,16 +186,27 @@ pub fn parse_channel_browse(channel_id: &str, body: &Value) -> ChannelInfo {
|
|||
if let Some(s) = header.get("pageTitle").and_then(|t| t.as_str()) {
|
||||
info.name = s.to_string();
|
||||
}
|
||||
if info.avatars.is_empty() {
|
||||
info.avatars = parse_page_header_avatar(header);
|
||||
}
|
||||
}
|
||||
|
||||
// microformat / description
|
||||
if let Some(desc) = body
|
||||
// microformat / description / avatar fallback. metadata.channel
|
||||
// MetadataRenderer.avatar.thumbnails[] is the most reliable avatar
|
||||
// path — present whether the header is c4TabbedHeaderRenderer or
|
||||
// pageHeaderRenderer. Use it as a last-resort backfill so newer
|
||||
// channel layouts that don't expose the avatar in the header still
|
||||
// give us SOMETHING.
|
||||
let metadata = body
|
||||
.get("metadata")
|
||||
.and_then(|m| m.get("channelMetadataRenderer"))
|
||||
.and_then(|m| m.get("description"))
|
||||
.and_then(|d| d.as_str())
|
||||
{
|
||||
info.description = desc.to_string();
|
||||
.and_then(|m| m.get("channelMetadataRenderer"));
|
||||
if let Some(m) = metadata {
|
||||
if let Some(desc) = m.get("description").and_then(|d| d.as_str()) {
|
||||
info.description = desc.to_string();
|
||||
}
|
||||
if info.avatars.is_empty() {
|
||||
info.avatars = parse_image_set(m.get("avatar"));
|
||||
}
|
||||
}
|
||||
|
||||
// Note: recent_videos are populated by a separate second browse to
|
||||
|
|
@ -461,6 +476,59 @@ fn parse_image_set(value: Option<&Value>) -> ImageSet {
|
|||
out
|
||||
}
|
||||
|
||||
/// Avatar extraction for the newer pageHeaderRenderer flavor.
|
||||
///
|
||||
/// Walks `header.content.pageHeaderViewModel.image` and finds the first
|
||||
/// `sources[]` array it can — that lives under either
|
||||
/// `decoratedAvatarViewModel.avatar.avatarViewModel.image.sources` or,
|
||||
/// on some channels, the slightly shallower
|
||||
/// `avatarViewModel.image.sources`. Returns ImageSet ordered by source
|
||||
/// height ascending — matches what parse_image_set produces for the
|
||||
/// legacy `thumbnails[]` path, so .last() still gives the largest one.
|
||||
fn parse_page_header_avatar(header: &Value) -> ImageSet {
|
||||
let content = header
|
||||
.get("content")
|
||||
.and_then(|c| c.get("pageHeaderViewModel"));
|
||||
let Some(content) = content else { return Vec::new() };
|
||||
let image = content.get("image");
|
||||
let Some(image) = image else { return Vec::new() };
|
||||
|
||||
// Try a couple of nestings — YT migrates the exact path occasionally
|
||||
// and we want to keep parsing through future shuffles.
|
||||
let candidates = [
|
||||
image
|
||||
.get("decoratedAvatarViewModel")
|
||||
.and_then(|d| d.get("avatar"))
|
||||
.and_then(|a| a.get("avatarViewModel"))
|
||||
.and_then(|a| a.get("image"))
|
||||
.and_then(|i| i.get("sources")),
|
||||
image
|
||||
.get("avatarViewModel")
|
||||
.and_then(|a| a.get("image"))
|
||||
.and_then(|i| i.get("sources")),
|
||||
image.get("sources"),
|
||||
];
|
||||
|
||||
for src in candidates.into_iter().flatten() {
|
||||
if let Some(arr) = src.as_array() {
|
||||
let mut out = Vec::with_capacity(arr.len());
|
||||
for s in arr {
|
||||
let Some(url) = s.get("url").and_then(|v| v.as_str()) else { continue };
|
||||
let w = s.get("width").and_then(|v| v.as_i64()).unwrap_or(-1) as i32;
|
||||
let h = s.get("height").and_then(|v| v.as_i64()).unwrap_or(-1) as i32;
|
||||
out.push(Image::new(url, h, w, ResolutionLevel::from_height(h)));
|
||||
}
|
||||
// Match parse_image_set ordering — caller uses .last() for
|
||||
// the largest size.
|
||||
out.sort_by_key(|i| i.height());
|
||||
if !out.is_empty() {
|
||||
return out;
|
||||
}
|
||||
}
|
||||
}
|
||||
Vec::new()
|
||||
}
|
||||
|
||||
fn parse_subscriber_count(text: &str) -> i64 {
|
||||
// "12.5M subscribers" / "1.2K subscribers" / "350 subscribers"
|
||||
let stripped = text
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue