From 012cde8b518a47a352cb2a27709a90cf1c7441b7 Mon Sep 17 00:00:00 2001 From: ThetaDev Date: Tue, 11 Oct 2022 01:16:42 +0200 Subject: [PATCH] fix: parsing errors with YouTube Music channel --- src/client/channel.rs | 85 +++++++++++++++++++++++++--------- src/client/response/channel.rs | 31 ++++++++++--- tests/youtube.rs | 3 +- 3 files changed, 87 insertions(+), 32 deletions(-) diff --git a/src/client/channel.rs b/src/client/channel.rs index a240b83..61e2db8 100644 --- a/src/client/channel.rs +++ b/src/client/channel.rs @@ -236,7 +236,9 @@ impl MapResponse> for response::Channel { &meta.joined_date_text, &mut warnings, ), - view_count: util::parse_numeric_or_warn(&meta.view_count_text, &mut warnings), + view_count: meta + .view_count_text + .and_then(|txt| util::parse_numeric_or_warn(&txt, &mut warnings)), links: meta .primary_links .into_iter() @@ -249,7 +251,7 @@ impl MapResponse> for response::Channel { .collect(), }) .unwrap_or_else(|| { - warnings.push("no metadata".to_owned()); + warnings.push("no aboutFullMetadata".to_owned()); ChannelInfo { create_date: None, view_count: None, @@ -424,33 +426,70 @@ fn map_channel( id: &str, lang: Language, ) -> Result, ExtractionError> { - let header = header.c4_tabbed_header_renderer; + let metadata = metadata.channel_metadata_renderer; - if header.channel_id != id { + if metadata.external_id != id { return Err(ExtractionError::WrongResult(format!( "got wrong channel id {}, expected {}", - header.channel_id, id + metadata.external_id, id ))); } - Ok(Channel { - id: header.channel_id, - name: header.title, - subscriber_count: header - .subscriber_count_text - .and_then(|txt| util::parse_large_numstr(&txt, lang)), - avatar: header.avatar.into(), - description: metadata.channel_metadata_renderer.description, - tags: microformat.microformat_data_renderer.tags, - vanity_url: metadata - .channel_metadata_renderer - .vanity_channel_url - .as_ref() - .and_then(|url| map_vanity_url(url, id)), - banner: header.banner.into(), - mobile_banner: header.mobile_banner.into(), - tv_banner: header.tv_banner.into(), - content, + let vanity_url = metadata + .vanity_channel_url + .as_ref() + .and_then(|url| map_vanity_url(url, id)); + + Ok(match header { + response::channel::Header::C4TabbedHeaderRenderer(header) => Channel { + id: metadata.external_id, + name: metadata.title, + subscriber_count: header + .subscriber_count_text + .and_then(|txt| util::parse_large_numstr(&txt, lang)), + avatar: header.avatar.into(), + description: metadata.description, + tags: microformat.microformat_data_renderer.tags, + vanity_url, + banner: header.banner.into(), + mobile_banner: header.mobile_banner.into(), + tv_banner: header.tv_banner.into(), + content, + }, + response::channel::Header::CarouselHeaderRenderer(carousel) => { + let hdata = carousel + .contents + .into_iter() + .filter_map(|item| { + match item { + response::channel::CarouselHeaderRendererItem::TopicChannelDetailsRenderer { + subscriber_count_text, + avatar, + } => Some((subscriber_count_text, avatar)), + response::channel::CarouselHeaderRendererItem::None => None, + } + }) + .next(); + + Channel { + id: metadata.external_id, + name: metadata.title, + subscriber_count: hdata.as_ref().and_then(|hdata| { + hdata + .0 + .as_ref() + .and_then(|txt| util::parse_large_numstr(txt, lang)) + }), + avatar: hdata.map(|hdata| hdata.1.into()).unwrap_or_default(), + description: metadata.description, + tags: microformat.microformat_data_renderer.tags, + vanity_url, + banner: Vec::new(), + mobile_banner: Vec::new(), + tv_banner: Vec::new(), + content, + } + } }) } diff --git a/src/client/response/channel.rs b/src/client/response/channel.rs index 9f38a03..0cb6671 100644 --- a/src/client/response/channel.rs +++ b/src/client/response/channel.rs @@ -85,17 +85,16 @@ pub enum ChannelContent { #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] -pub struct Header { - pub c4_tabbed_header_renderer: HeaderRenderer, +pub enum Header { + C4TabbedHeaderRenderer(HeaderRenderer), + /// Used for special channels like YouTube Music + CarouselHeaderRenderer(ContentsRenderer), } #[serde_as] #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub struct HeaderRenderer { - pub channel_id: String, - /// Channel name - pub title: String, /// Approximate subscriber count (e.g. `880K subscribers`), depends on language. /// /// `None` if the subscriber count is hidden. @@ -114,6 +113,21 @@ pub struct HeaderRenderer { pub tv_banner: Thumbnails, } +#[serde_as] +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub enum CarouselHeaderRendererItem { + #[serde(rename_all = "camelCase")] + TopicChannelDetailsRenderer { + #[serde_as(as = "Option")] + subscriber_count_text: Option, + #[serde(default)] + avatar: Thumbnails, + }, + #[serde(other, deserialize_with = "ignore_any")] + None, +} + #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub struct Metadata { @@ -123,6 +137,9 @@ pub struct Metadata { #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub struct ChannelMetadataRenderer { + pub title: String, + /// Channel ID + pub external_id: String, pub description: String, pub vanity_channel_url: Option, } @@ -146,8 +163,8 @@ pub struct MicroformatDataRenderer { pub struct ChannelFullMetadata { #[serde_as(as = "Text")] pub joined_date_text: String, - #[serde_as(as = "Text")] - pub view_count_text: String, + #[serde_as(as = "Option")] + pub view_count_text: Option, #[serde(default)] #[serde_as(as = "VecSkipError<_>")] pub primary_links: Vec, diff --git a/tests/youtube.rs b/tests/youtube.rs index 9e50617..90344fa 100644 --- a/tests/youtube.rs +++ b/tests/youtube.rs @@ -819,8 +819,7 @@ fn assert_channel_eevblog(channel: &Channel) { true, true )] -// TODO: fix YouTube Music extraction error -// #[case::music("UC-9-kyTW8ZkZNDHQJ6FgpwQ", "Music", false, false)] +#[case::music("UC-9-kyTW8ZkZNDHQJ6FgpwQ", "Music", false, false)] #[tokio::test] async fn channel_more( #[case] id: &str,