fix: handle itemSectionRenderer in recommended videos

This commit is contained in:
ThetaDev 2022-10-11 22:21:32 +02:00
parent c65af48a4b
commit eb9d3680cc
9 changed files with 18121 additions and 69 deletions

View file

@ -1018,6 +1018,9 @@ impl RustyPipeQuery {
let status = response.status();
let resp_str = response.text().await?;
// Uncomment to debug response text
// println!("{}", &resp_str);
let create_report = |level: Level, error: Option<String>, msgs: Vec<String>| {
if report {
if let Some(reporter) = &self.client.inner.reporter {

View file

@ -29,13 +29,14 @@ use serde_with::{json::JsonString, serde_as, DefaultOnError, VecSkipError};
use crate::error::ExtractionError;
use crate::model;
use crate::param::Language;
use crate::serializer::MapResult;
use crate::serializer::{
ignore_any,
text::{Text, TextComponent},
VecLogError,
};
use crate::timeago;
use crate::util;
use crate::util::TryRemove;
use crate::util::{self, TryRemove};
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
@ -74,6 +75,7 @@ pub struct Thumbnail {
pub height: u32,
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub enum VideoListItem {
@ -91,6 +93,13 @@ pub enum VideoListItem {
/// as of 11.10.2022
RichItemRenderer { content: RichItem },
/// Seems to be currently A/B tested on the video details page,
/// as of 11.10.2022
ItemSectionRenderer {
#[serde_as(as = "VecLogError<_>")]
contents: MapResult<Vec<VideoListItem>>,
},
/// Continauation items are located at the end of a list
/// and contain the continuation token for progressive loading
#[serde(rename_all = "camelCase")]
@ -507,6 +516,10 @@ pub trait FromWLang<T> {
fn from_w_lang(from: T, lang: Language) -> Self;
}
pub trait TryFromWLang<T>: Sized {
fn from_w_lang(from: T, lang: Language) -> core::result::Result<Self, util::MappingError>;
}
impl FromWLang<GridVideoRenderer> for model::ChannelVideo {
fn from_w_lang(video: GridVideoRenderer, lang: Language) -> Self {
let mut toverlays = video.thumbnail_overlays;
@ -559,3 +572,39 @@ impl From<GridPlaylistRenderer> for model::ChannelPlaylist {
}
}
}
impl TryFromWLang<CompactVideoRenderer> for model::RecommendedVideo {
fn from_w_lang(
video: CompactVideoRenderer,
lang: Language,
) -> core::result::Result<Self, util::MappingError> {
let channel = model::ChannelId::try_from(video.channel)?;
Ok(Self {
id: video.video_id,
title: video.title,
length: video
.length_text
.and_then(|txt| util::parse_video_length(&txt)),
thumbnail: video.thumbnail.into(),
channel: model::ChannelTag {
id: channel.id,
name: channel.name,
avatar: video.channel_thumbnail.into(),
verification: video.owner_badges.into(),
subscriber_count: None,
},
publish_date: video
.published_time_text
.as_ref()
.and_then(|txt| timeago::parse_timeago_to_dt(lang, txt)),
publish_date_txt: video.published_time_text,
view_count: video
.view_count_text
.and_then(|txt| util::parse_numeric(&txt).ok())
.unwrap_or_default(),
is_live: video.badges.is_live(),
is_short: video.thumbnail_overlays.is_short(),
})
}
}

View file

@ -1,10 +1,8 @@
use std::convert::TryFrom;
use serde::Serialize;
use crate::{
error::{Error, ExtractionError},
model::{ChannelId, ChannelTag, Chapter, Comment, Paginator, RecommendedVideo, VideoDetails},
model::{ChannelTag, Chapter, Comment, Paginator, RecommendedVideo, VideoDetails},
param::Language,
serializer::MapResult,
timeago,
@ -12,7 +10,7 @@ use crate::{
};
use super::{
response::{self, IconType, IsLive, IsShort},
response::{self, IconType, TryFromWLang},
ClientType, MapResponse, QContinuation, RustyPipeQuery, YTContext,
};
@ -423,51 +421,30 @@ fn map_recommendations(
let mut warnings = r.warnings;
let mut ctoken = None;
let items =
r.c.into_iter()
.filter_map(|item| match item {
response::VideoListItem::CompactVideoRenderer(video) => {
match ChannelId::try_from(video.channel) {
Ok(channel) => Some(RecommendedVideo {
id: video.video_id,
title: video.title,
length: video.length_text.and_then(|txt| {
util::parse_video_length_or_warn(&txt, &mut warnings)
}),
thumbnail: video.thumbnail.into(),
channel: ChannelTag {
id: channel.id,
name: channel.name,
avatar: video.channel_thumbnail.into(),
verification: video.owner_badges.into(),
subscriber_count: None,
},
publish_date: video.published_time_text.as_ref().and_then(|txt| {
timeago::parse_timeago_or_warn(lang, txt, &mut warnings)
}),
publish_date_txt: video.published_time_text,
view_count: video
.view_count_text
.and_then(|txt| util::parse_numeric(&txt).ok())
.unwrap_or_default(),
is_live: video.badges.is_live(),
is_short: video.thumbnail_overlays.is_short(),
}),
Err(e) => {
warnings.push(e.to_string());
None
}
}
}
response::VideoListItem::ContinuationItemRenderer {
continuation_endpoint,
} => {
ctoken = Some(continuation_endpoint.continuation_command.token);
None
}
_ => None,
})
.collect::<Vec<_>>();
let mut items = Vec::new();
r.c.into_iter().for_each(|item| match item {
response::VideoListItem::CompactVideoRenderer(video) => {
match RecommendedVideo::from_w_lang(video, lang) {
Ok(video) => items.push(video),
Err(e) => warnings.push(e.to_string()),
}
}
response::VideoListItem::ItemSectionRenderer { contents } => {
let mut x = map_recommendations(contents, None, lang);
items.append(&mut x.c.items);
warnings.append(&mut x.warnings);
if let Some(ct) = x.c.ctoken {
ctoken = Some(ct)
}
}
response::VideoListItem::ContinuationItemRenderer {
continuation_endpoint,
} => {
ctoken = Some(continuation_endpoint.continuation_command.token);
}
_ => {}
});
if let Some(continuations) = continuations {
continuations.into_iter().for_each(|c| {
@ -586,6 +563,7 @@ mod tests {
#[case::agegate("agegate", "HRKu0cvrr_o")]
#[case::newdesc("20220924_newdesc", "ZeerrnuLi5E")]
#[case::new_cont("20221011_new_continuation", "ZeerrnuLi5E")]
#[case::no_recommends("20221011_rec_isr", "nFDBxBUfE74")]
fn map_video_details(#[case] name: &str, #[case] id: &str) {
let filename = format!("testfiles/video_details/video_details_{}.json", name);
let json_path = Path::new(&filename);
@ -593,6 +571,7 @@ mod tests {
let details: response::VideoDetails =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
dbg!(&details);
let map_res = details.map_response(id, Language::En, None).unwrap();
assert!(

View file

@ -90,8 +90,3 @@ pub enum ExtractionError {
#[error("Got no data from YouTube, attempt retry")]
Retry,
}
/// Internal error
#[derive(thiserror::Error, Debug)]
#[error("mapping error: {0}")]
pub struct MappingError(pub(crate) Cow<'static, str>);

View file

@ -5,7 +5,7 @@ use once_cell::sync::Lazy;
use serde::{Deserialize, Deserializer};
use serde_with::{serde_as, DefaultOnError, DeserializeAs};
use crate::{error::MappingError, util};
use crate::util;
/// # Text
///
@ -360,7 +360,7 @@ impl<'de> DeserializeAs<'de, TextComponents> for AttributedText {
}
impl TryFrom<TextComponent> for crate::model::ChannelId {
type Error = MappingError;
type Error = util::MappingError;
fn try_from(value: TextComponent) -> Result<Self, Self::Error> {
match value {
@ -373,9 +373,9 @@ impl TryFrom<TextComponent> for crate::model::ChannelId {
id: browse_id,
name: text,
}),
_ => Err(MappingError("invalid channel link type".into())),
_ => Err(util::MappingError("invalid channel link type".into())),
},
_ => Err(MappingError("invalid channel link".into())),
_ => Err(util::MappingError("invalid channel link".into())),
}
}
}

View file

@ -4,7 +4,11 @@ pub mod dictionary;
pub use protobuf::ProtoBuilder;
use std::{borrow::Borrow, collections::BTreeMap, str::FromStr};
use std::{
borrow::{Borrow, Cow},
collections::BTreeMap,
str::FromStr,
};
use fancy_regex::Regex;
use once_cell::sync::Lazy;
@ -16,6 +20,11 @@ use crate::{error::Error, error::Result, param::Language};
const CONTENT_PLAYBACK_NONCE_ALPHABET: &[u8; 64] =
b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
/// Internal error
#[derive(thiserror::Error, Debug)]
#[error("mapping error: {0}")]
pub struct MappingError(pub(crate) Cow<'static, str>);
/// Return the given capture group that matches first in a list of regexes
pub fn get_cg_from_regexes<'a, I>(mut regexes: I, text: &str, cg: usize) -> Option<String>
where

File diff suppressed because one or more lines are too long

View file

@ -168,13 +168,11 @@ async fn get_playlist(
assert!(playlist.video_count > 10);
assert_eq!(playlist.video_count > 100, is_long);
assert_eq!(playlist.description, description);
match playlist.channel {
Some(c) => {
let expect = channel.unwrap();
assert_eq!(c.id, expect.0);
assert_eq!(c.name, expect.1);
}
None => assert!(channel.is_none()),
if let Some(expect) = channel {
let c = playlist.channel.unwrap();
assert_eq!(c.id, expect.0);
assert_eq!(c.name, expect.1);
}
assert!(!playlist.thumbnail.is_empty());
}
@ -439,7 +437,9 @@ async fn get_video_details_chapters() {
assert!(!details.is_live);
assert!(!details.is_ccommons);
insta::assert_ron_snapshot!(details.chapters, {
// In rare cases, YouTube does not return chapters here
if !details.chapters.is_empty() {
insta::assert_ron_snapshot!(details.chapters, {
"[].thumbnail" => insta::dynamic_redaction(move |value, _path| {
assert!(!value.as_slice().unwrap().is_empty());
"[ok]"
@ -518,6 +518,7 @@ async fn get_video_details_chapters() {
),
]
"###);
}
assert!(!details.recommended.is_exhausted());