fix: parsing errors with YouTube Music channel

This commit is contained in:
ThetaDev 2022-10-11 01:16:42 +02:00
parent 1fb4a2664e
commit 012cde8b51
3 changed files with 87 additions and 32 deletions

View file

@ -236,7 +236,9 @@ impl MapResponse<Channel<ChannelInfo>> for response::Channel {
&meta.joined_date_text,
&mut warnings,
),
view_count: util::parse_numeric_or_warn(&meta.view_count_text, &mut warnings),
view_count: meta
.view_count_text
.and_then(|txt| util::parse_numeric_or_warn(&txt, &mut warnings)),
links: meta
.primary_links
.into_iter()
@ -249,7 +251,7 @@ impl MapResponse<Channel<ChannelInfo>> for response::Channel {
.collect(),
})
.unwrap_or_else(|| {
warnings.push("no metadata".to_owned());
warnings.push("no aboutFullMetadata".to_owned());
ChannelInfo {
create_date: None,
view_count: None,
@ -424,33 +426,70 @@ fn map_channel<T>(
id: &str,
lang: Language,
) -> Result<Channel<T>, ExtractionError> {
let header = header.c4_tabbed_header_renderer;
let metadata = metadata.channel_metadata_renderer;
if header.channel_id != id {
if metadata.external_id != id {
return Err(ExtractionError::WrongResult(format!(
"got wrong channel id {}, expected {}",
header.channel_id, id
metadata.external_id, id
)));
}
Ok(Channel {
id: header.channel_id,
name: header.title,
subscriber_count: header
.subscriber_count_text
.and_then(|txt| util::parse_large_numstr(&txt, lang)),
avatar: header.avatar.into(),
description: metadata.channel_metadata_renderer.description,
tags: microformat.microformat_data_renderer.tags,
vanity_url: metadata
.channel_metadata_renderer
.vanity_channel_url
.as_ref()
.and_then(|url| map_vanity_url(url, id)),
banner: header.banner.into(),
mobile_banner: header.mobile_banner.into(),
tv_banner: header.tv_banner.into(),
content,
let vanity_url = metadata
.vanity_channel_url
.as_ref()
.and_then(|url| map_vanity_url(url, id));
Ok(match header {
response::channel::Header::C4TabbedHeaderRenderer(header) => Channel {
id: metadata.external_id,
name: metadata.title,
subscriber_count: header
.subscriber_count_text
.and_then(|txt| util::parse_large_numstr(&txt, lang)),
avatar: header.avatar.into(),
description: metadata.description,
tags: microformat.microformat_data_renderer.tags,
vanity_url,
banner: header.banner.into(),
mobile_banner: header.mobile_banner.into(),
tv_banner: header.tv_banner.into(),
content,
},
response::channel::Header::CarouselHeaderRenderer(carousel) => {
let hdata = carousel
.contents
.into_iter()
.filter_map(|item| {
match item {
response::channel::CarouselHeaderRendererItem::TopicChannelDetailsRenderer {
subscriber_count_text,
avatar,
} => Some((subscriber_count_text, avatar)),
response::channel::CarouselHeaderRendererItem::None => None,
}
})
.next();
Channel {
id: metadata.external_id,
name: metadata.title,
subscriber_count: hdata.as_ref().and_then(|hdata| {
hdata
.0
.as_ref()
.and_then(|txt| util::parse_large_numstr(txt, lang))
}),
avatar: hdata.map(|hdata| hdata.1.into()).unwrap_or_default(),
description: metadata.description,
tags: microformat.microformat_data_renderer.tags,
vanity_url,
banner: Vec::new(),
mobile_banner: Vec::new(),
tv_banner: Vec::new(),
content,
}
}
})
}

View file

@ -85,17 +85,16 @@ pub enum ChannelContent {
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Header {
pub c4_tabbed_header_renderer: HeaderRenderer,
pub enum Header {
C4TabbedHeaderRenderer(HeaderRenderer),
/// Used for special channels like YouTube Music
CarouselHeaderRenderer(ContentsRenderer<CarouselHeaderRendererItem>),
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct HeaderRenderer {
pub channel_id: String,
/// Channel name
pub title: String,
/// Approximate subscriber count (e.g. `880K subscribers`), depends on language.
///
/// `None` if the subscriber count is hidden.
@ -114,6 +113,21 @@ pub struct HeaderRenderer {
pub tv_banner: Thumbnails,
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub enum CarouselHeaderRendererItem {
#[serde(rename_all = "camelCase")]
TopicChannelDetailsRenderer {
#[serde_as(as = "Option<Text>")]
subscriber_count_text: Option<String>,
#[serde(default)]
avatar: Thumbnails,
},
#[serde(other, deserialize_with = "ignore_any")]
None,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Metadata {
@ -123,6 +137,9 @@ pub struct Metadata {
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ChannelMetadataRenderer {
pub title: String,
/// Channel ID
pub external_id: String,
pub description: String,
pub vanity_channel_url: Option<String>,
}
@ -146,8 +163,8 @@ pub struct MicroformatDataRenderer {
pub struct ChannelFullMetadata {
#[serde_as(as = "Text")]
pub joined_date_text: String,
#[serde_as(as = "Text")]
pub view_count_text: String,
#[serde_as(as = "Option<Text>")]
pub view_count_text: Option<String>,
#[serde(default)]
#[serde_as(as = "VecSkipError<_>")]
pub primary_links: Vec<PrimaryLink>,

View file

@ -819,8 +819,7 @@ fn assert_channel_eevblog<T>(channel: &Channel<T>) {
true,
true
)]
// TODO: fix YouTube Music extraction error
// #[case::music("UC-9-kyTW8ZkZNDHQJ6FgpwQ", "Music", false, false)]
#[case::music("UC-9-kyTW8ZkZNDHQJ6FgpwQ", "Music", false, false)]
#[tokio::test]
async fn channel_more(
#[case] id: &str,