fix: Handle trimmed channel ID from RSS feed

This commit is contained in:
ThetaDev 2023-11-03 16:16:53 +01:00
parent 1ec1666d77
commit cced125390
4 changed files with 1474 additions and 65 deletions

View file

@ -4,6 +4,7 @@ use crate::{
error::{Error, ExtractionError}, error::{Error, ExtractionError},
model::ChannelRss, model::ChannelRss,
report::{Report, RustyPipeInfo}, report::{Report, RustyPipeInfo},
util,
}; };
use super::{response, RustyPipeQuery}; use super::{response, RustyPipeQuery};
@ -36,8 +37,11 @@ impl RustyPipeQuery {
_ => e, _ => e,
})?; })?;
match quick_xml::de::from_str::<response::ChannelRss>(&xml) { match quick_xml::de::from_str::<response::ChannelRss>(&xml)
Ok(feed) => Ok(feed.into()), .map_err(|e| ExtractionError::InvalidData(e.to_string().into()))
.and_then(|feed| feed.map_response(channel_id))
{
Ok(res) => Ok(res),
Err(e) => { Err(e) => {
if let Some(reporter) = &self.client.inner.reporter { if let Some(reporter) = &self.client.inner.reporter {
let report = Report { let report = Report {
@ -59,38 +63,94 @@ impl RustyPipeQuery {
reporter.report(&report); reporter.report(&report);
} }
Err(Error::Extraction(e))
Err(
ExtractionError::InvalidData(format!("could not deserialize xml: {e}").into())
.into(),
)
} }
} }
} }
} }
impl response::ChannelRss {
fn map_response(self, id: &str) -> Result<ChannelRss, ExtractionError> {
let channel_id = if self.channel_id.is_empty() {
self.entry
.iter()
.find_map(|entry| {
Some(entry.channel_id.as_str())
.filter(|id| id.is_empty())
.map(str::to_owned)
})
.or_else(|| {
self.author
.uri
.strip_prefix("https://www.youtube.com/channel/")
.and_then(|id| {
if util::CHANNEL_ID_REGEX.is_match(id) {
Some(id.to_owned())
} else {
None
}
})
})
.ok_or(ExtractionError::InvalidData(
"could not get channel id".into(),
))?
} else if self.channel_id.len() == 22 {
// As of November 2023, YouTube seems to output channel IDs without the UC prefix
format!("UC{}", self.channel_id)
} else {
self.channel_id
};
if channel_id != id {
return Err(ExtractionError::WrongResult(format!(
"got wrong channel id {channel_id}, expected {id}",
)));
}
Ok(ChannelRss {
id: channel_id,
name: self.title,
videos: self
.entry
.into_iter()
.map(|item| crate::model::ChannelRssVideo {
id: item.video_id,
name: item.title,
description: item.media_group.description,
thumbnail: item.media_group.thumbnail.into(),
publish_date: item.published,
update_date: item.updated,
view_count: item.media_group.community.statistics.views,
like_count: item.media_group.community.rating.count,
})
.collect(),
create_date: self.create_date,
})
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::{fs::File, io::BufReader}; use std::{fs::File, io::BufReader};
use crate::{client::response, model::ChannelRss, util::tests::TESTFILES}; use crate::{client::response, util::tests::TESTFILES};
use path_macro::path; use path_macro::path;
use rstest::rstest; use rstest::rstest;
#[rstest] #[rstest]
#[case::base("base")] #[case::base("base", "UCHnyfMqiRRG1u-2MsSQLbXA")]
#[case::no_likes("no_likes")] #[case::no_likes("no_likes", "UCdfxp4cUWsWryZOy-o427dw")]
#[case::no_channel_id("no_channel_id")] #[case::no_channel_id("no_channel_id", "UCHnyfMqiRRG1u-2MsSQLbXA")]
fn map_channel_rss(#[case] name: &str) { #[case::trimmed_channel_id("trimmed_channel_id", "UCHnyfMqiRRG1u-2MsSQLbXA")]
fn map_channel_rss(#[case] name: &str, #[case] id: &str) {
let xml_path = path!(*TESTFILES / "channel_rss" / format!("{}.xml", name)); let xml_path = path!(*TESTFILES / "channel_rss" / format!("{}.xml", name));
let xml_file = File::open(xml_path).unwrap(); let xml_file = File::open(xml_path).unwrap();
let feed: response::ChannelRss = let feed: response::ChannelRss =
quick_xml::de::from_reader(BufReader::new(xml_file)).unwrap(); quick_xml::de::from_reader(BufReader::new(xml_file)).unwrap();
let map_res: ChannelRss = feed.into(); let map_res = feed.map_response(id).unwrap();
insta::assert_ron_snapshot!(format!("map_channel_rss_{}", name), map_res); insta::assert_ron_snapshot!(format!("map_channel_rss_{}", name), map_res);
} }
} }

View file

@ -1,8 +1,6 @@
use serde::Deserialize; use serde::Deserialize;
use time::OffsetDateTime; use time::OffsetDateTime;
use crate::util;
#[derive(Debug, Deserialize)] #[derive(Debug, Deserialize)]
pub(crate) struct ChannelRss { pub(crate) struct ChannelRss {
#[serde(rename = "channelId")] #[serde(rename = "channelId")]
@ -80,52 +78,3 @@ impl From<Thumbnail> for crate::model::Thumbnail {
} }
} }
} }
impl From<ChannelRss> for crate::model::ChannelRss {
fn from(feed: ChannelRss) -> Self {
let id = if feed.channel_id.is_empty() {
feed.entry
.iter()
.find_map(|entry| {
Some(entry.channel_id.as_str())
.filter(|id| id.is_empty())
.map(str::to_owned)
})
.or_else(|| {
feed.author
.uri
.strip_prefix("https://www.youtube.com/channel/")
.and_then(|id| {
if util::CHANNEL_ID_REGEX.is_match(id) {
Some(id.to_owned())
} else {
None
}
})
})
.unwrap_or_default()
} else {
feed.channel_id
};
Self {
id,
name: feed.title,
videos: feed
.entry
.into_iter()
.map(|item| crate::model::ChannelRssVideo {
id: item.video_id,
name: item.title,
description: item.media_group.description,
thumbnail: item.media_group.thumbnail.into(),
publish_date: item.published,
update_date: item.updated,
view_count: item.media_group.community.statistics.views,
like_count: item.media_group.community.rating.count,
})
.collect(),
create_date: feed.create_date,
}
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load diff