fix: Handle trimmed channel ID from RSS feed
This commit is contained in:
parent
1ec1666d77
commit
cced125390
4 changed files with 1474 additions and 65 deletions
|
|
@ -4,6 +4,7 @@ use crate::{
|
|||
error::{Error, ExtractionError},
|
||||
model::ChannelRss,
|
||||
report::{Report, RustyPipeInfo},
|
||||
util,
|
||||
};
|
||||
|
||||
use super::{response, RustyPipeQuery};
|
||||
|
|
@ -36,8 +37,11 @@ impl RustyPipeQuery {
|
|||
_ => e,
|
||||
})?;
|
||||
|
||||
match quick_xml::de::from_str::<response::ChannelRss>(&xml) {
|
||||
Ok(feed) => Ok(feed.into()),
|
||||
match quick_xml::de::from_str::<response::ChannelRss>(&xml)
|
||||
.map_err(|e| ExtractionError::InvalidData(e.to_string().into()))
|
||||
.and_then(|feed| feed.map_response(channel_id))
|
||||
{
|
||||
Ok(res) => Ok(res),
|
||||
Err(e) => {
|
||||
if let Some(reporter) = &self.client.inner.reporter {
|
||||
let report = Report {
|
||||
|
|
@ -59,38 +63,94 @@ impl RustyPipeQuery {
|
|||
|
||||
reporter.report(&report);
|
||||
}
|
||||
|
||||
Err(
|
||||
ExtractionError::InvalidData(format!("could not deserialize xml: {e}").into())
|
||||
.into(),
|
||||
)
|
||||
Err(Error::Extraction(e))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl response::ChannelRss {
|
||||
fn map_response(self, id: &str) -> Result<ChannelRss, ExtractionError> {
|
||||
let channel_id = if self.channel_id.is_empty() {
|
||||
self.entry
|
||||
.iter()
|
||||
.find_map(|entry| {
|
||||
Some(entry.channel_id.as_str())
|
||||
.filter(|id| id.is_empty())
|
||||
.map(str::to_owned)
|
||||
})
|
||||
.or_else(|| {
|
||||
self.author
|
||||
.uri
|
||||
.strip_prefix("https://www.youtube.com/channel/")
|
||||
.and_then(|id| {
|
||||
if util::CHANNEL_ID_REGEX.is_match(id) {
|
||||
Some(id.to_owned())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
})
|
||||
.ok_or(ExtractionError::InvalidData(
|
||||
"could not get channel id".into(),
|
||||
))?
|
||||
} else if self.channel_id.len() == 22 {
|
||||
// As of November 2023, YouTube seems to output channel IDs without the UC prefix
|
||||
format!("UC{}", self.channel_id)
|
||||
} else {
|
||||
self.channel_id
|
||||
};
|
||||
|
||||
if channel_id != id {
|
||||
return Err(ExtractionError::WrongResult(format!(
|
||||
"got wrong channel id {channel_id}, expected {id}",
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(ChannelRss {
|
||||
id: channel_id,
|
||||
name: self.title,
|
||||
videos: self
|
||||
.entry
|
||||
.into_iter()
|
||||
.map(|item| crate::model::ChannelRssVideo {
|
||||
id: item.video_id,
|
||||
name: item.title,
|
||||
description: item.media_group.description,
|
||||
thumbnail: item.media_group.thumbnail.into(),
|
||||
publish_date: item.published,
|
||||
update_date: item.updated,
|
||||
view_count: item.media_group.community.statistics.views,
|
||||
like_count: item.media_group.community.rating.count,
|
||||
})
|
||||
.collect(),
|
||||
create_date: self.create_date,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::{fs::File, io::BufReader};
|
||||
|
||||
use crate::{client::response, model::ChannelRss, util::tests::TESTFILES};
|
||||
use crate::{client::response, util::tests::TESTFILES};
|
||||
|
||||
use path_macro::path;
|
||||
use rstest::rstest;
|
||||
|
||||
#[rstest]
|
||||
#[case::base("base")]
|
||||
#[case::no_likes("no_likes")]
|
||||
#[case::no_channel_id("no_channel_id")]
|
||||
fn map_channel_rss(#[case] name: &str) {
|
||||
#[case::base("base", "UCHnyfMqiRRG1u-2MsSQLbXA")]
|
||||
#[case::no_likes("no_likes", "UCdfxp4cUWsWryZOy-o427dw")]
|
||||
#[case::no_channel_id("no_channel_id", "UCHnyfMqiRRG1u-2MsSQLbXA")]
|
||||
#[case::trimmed_channel_id("trimmed_channel_id", "UCHnyfMqiRRG1u-2MsSQLbXA")]
|
||||
fn map_channel_rss(#[case] name: &str, #[case] id: &str) {
|
||||
let xml_path = path!(*TESTFILES / "channel_rss" / format!("{}.xml", name));
|
||||
let xml_file = File::open(xml_path).unwrap();
|
||||
|
||||
let feed: response::ChannelRss =
|
||||
quick_xml::de::from_reader(BufReader::new(xml_file)).unwrap();
|
||||
|
||||
let map_res: ChannelRss = feed.into();
|
||||
|
||||
let map_res = feed.map_response(id).unwrap();
|
||||
insta::assert_ron_snapshot!(format!("map_channel_rss_{}", name), map_res);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,8 +1,6 @@
|
|||
use serde::Deserialize;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::util;
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub(crate) struct ChannelRss {
|
||||
#[serde(rename = "channelId")]
|
||||
|
|
@ -80,52 +78,3 @@ impl From<Thumbnail> for crate::model::Thumbnail {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ChannelRss> for crate::model::ChannelRss {
|
||||
fn from(feed: ChannelRss) -> Self {
|
||||
let id = if feed.channel_id.is_empty() {
|
||||
feed.entry
|
||||
.iter()
|
||||
.find_map(|entry| {
|
||||
Some(entry.channel_id.as_str())
|
||||
.filter(|id| id.is_empty())
|
||||
.map(str::to_owned)
|
||||
})
|
||||
.or_else(|| {
|
||||
feed.author
|
||||
.uri
|
||||
.strip_prefix("https://www.youtube.com/channel/")
|
||||
.and_then(|id| {
|
||||
if util::CHANNEL_ID_REGEX.is_match(id) {
|
||||
Some(id.to_owned())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
})
|
||||
.unwrap_or_default()
|
||||
} else {
|
||||
feed.channel_id
|
||||
};
|
||||
|
||||
Self {
|
||||
id,
|
||||
name: feed.title,
|
||||
videos: feed
|
||||
.entry
|
||||
.into_iter()
|
||||
.map(|item| crate::model::ChannelRssVideo {
|
||||
id: item.video_id,
|
||||
name: item.title,
|
||||
description: item.media_group.description,
|
||||
thumbnail: item.media_group.thumbnail.into(),
|
||||
publish_date: item.published,
|
||||
update_date: item.updated,
|
||||
view_count: item.media_group.community.statistics.views,
|
||||
like_count: item.media_group.community.rating.count,
|
||||
})
|
||||
.collect(),
|
||||
create_date: feed.create_date,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
1179
testfiles/channel_rss/trimmed_channel_id.xml
Normal file
1179
testfiles/channel_rss/trimmed_channel_id.xml
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue