fix: parsing videos using LockupViewModel (Music video recommendations)

This commit is contained in:
ThetaDev 2024-11-10 03:05:48 +01:00
parent e1e1687605
commit 870ff79ee0
No known key found for this signature in database
GPG key ID: E319D3C5148D65B6
7 changed files with 308 additions and 158 deletions

View file

@ -9,7 +9,7 @@ use crate::{
error::{Error, ExtractionError},
model::{
paginator::{ContinuationEndpoint, Paginator},
Channel, ChannelInfo, PlaylistItem, VideoItem,
Channel, ChannelInfo, PlaylistItem, Verification, VideoItem,
},
param::{ChannelOrder, ChannelVideoTab, Language},
serializer::{text::TextComponent, MapResult},
@ -489,7 +489,7 @@ fn map_channel(
.avatar_view_model
.image
.into(),
verification: hdata.title.into(),
verification: hdata.title.map(Verification::from).unwrap_or_default(),
description: metadata.description,
tags: microformat.microformat_data_renderer.tags,
banner: hdata.banner.image_banner_view_model.image.into(),

View file

@ -2,11 +2,14 @@ use serde::Deserialize;
use serde_with::{rust::deserialize_ignore_any, serde_as, DefaultOnError, VecSkipError};
use super::{
video_item::YouTubeListRenderer, Alert, ChannelBadge, ContentRenderer, ContentsRenderer,
ContinuationActionWrap, ImageView, PageHeaderRendererContent, PhMetadataView, ResponseContext,
Thumbnails, TwoColumnBrowseResults,
video_item::YouTubeListRenderer, Alert, AttachmentRun, ChannelBadge, ContentRenderer,
ContentsRenderer, ContinuationActionWrap, ImageView, PageHeaderRendererContent, PhMetadataView,
ResponseContext, Thumbnails, TwoColumnBrowseResults,
};
use crate::{
model::Verification,
serializer::text::{AttributedText, Text, TextComponent},
};
use crate::serializer::text::{AttributedText, Text, TextComponent};
#[serde_as]
#[derive(Debug, Deserialize)]
@ -121,7 +124,7 @@ pub(crate) enum CarouselHeaderRendererItem {
pub(crate) struct PageHeaderRendererInner {
/// Channel title (only used to extract verification badges)
#[serde_as(as = "DefaultOnError")]
pub title: PhTitleView,
pub title: Option<PhTitleView>,
/// Channel avatar
pub image: PhAvatarView,
/// Channel metadata (subscribers, video count)
@ -130,7 +133,7 @@ pub(crate) struct PageHeaderRendererInner {
pub banner: PhBannerView,
}
#[derive(Default, Debug, Deserialize)]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct PhTitleView {
pub dynamic_text_view_model: PhTitleView2,
@ -150,58 +153,6 @@ pub(crate) struct PhTitleView3 {
pub attachment_runs: Vec<AttachmentRun>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct AttachmentRun {
pub element: AttachmentRunElement,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct AttachmentRunElement {
#[serde(rename = "type")]
pub typ: AttachmentRunElementType,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct AttachmentRunElementType {
pub image_type: AttachmentRunElementImageType,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct AttachmentRunElementImageType {
pub image: AttachmentRunElementImage,
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct AttachmentRunElementImage {
#[serde_as(as = "VecSkipError<_>")]
pub sources: Vec<AttachmentRunElementImageSource>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct AttachmentRunElementImageSource {
pub client_resource: ClientResource,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct ClientResource {
pub image_name: IconName,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
pub(crate) enum IconName {
CheckCircleFilled,
MusicFilled,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct PhAvatarView {
@ -330,15 +281,9 @@ impl From<PhTitleView> for crate::model::Verification {
.dynamic_text_view_model
.text
.attachment_runs
.iter()
.find_map(|r| {
r.element.typ.image_type.image.sources.first().map(|s| {
match s.client_resource.image_name {
IconName::CheckCircleFilled => crate::model::Verification::Verified,
IconName::MusicFilled => crate::model::Verification::Artist,
}
})
})
.into_iter()
.next()
.map(Verification::from)
.unwrap_or_default()
}
}

View file

@ -199,12 +199,73 @@ pub(crate) struct TextBox {
pub text: String,
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct TextComponentBox {
#[serde_as(deserialize_as = "AttributedText")]
pub text: TextComponent,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct ResponseContext {
pub visitor_data: Option<String>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct AttachmentRun {
pub element: AttachmentRunElement,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct AttachmentRunElement {
#[serde(rename = "type")]
pub typ: AttachmentRunElementType,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct AttachmentRunElementType {
pub image_type: AttachmentRunElementImageType,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct AttachmentRunElementImageType {
pub image: AttachmentRunElementImage,
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct AttachmentRunElementImage {
#[serde_as(as = "VecSkipError<_>")]
pub sources: Vec<AttachmentRunElementImageSource>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct AttachmentRunElementImageSource {
pub client_resource: ClientResource,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct ClientResource {
pub image_name: IconName,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
pub enum IconName {
CheckCircleFilled,
#[serde(alias = "AUDIO_BADGE")]
MusicFilled,
}
// CONTINUATION
#[serde_as]
@ -343,6 +404,17 @@ impl From<Thumbnails> for Vec<crate::model::Thumbnail> {
}
}
impl ContentImage {
pub(crate) fn into_image(self) -> ImageViewOl {
match self {
ContentImage::ThumbnailViewModel(image) => image,
ContentImage::CollectionThumbnailViewModel { primary_thumbnail } => {
primary_thumbnail.thumbnail_view_model
}
}
}
}
impl From<Vec<ChannelBadge>> for crate::model::Verification {
fn from(badges: Vec<ChannelBadge>) -> Self {
badges
@ -366,6 +438,25 @@ impl From<Icon> for crate::model::Verification {
}
}
impl From<AttachmentRun> for crate::model::Verification {
fn from(value: AttachmentRun) -> Self {
match value
.element
.typ
.image_type
.image
.sources
.into_iter()
.next()
.map(|s| s.client_resource.image_name)
{
Some(IconName::CheckCircleFilled) => Self::Verified,
Some(IconName::MusicFilled) => Self::Artist,
None => Self::None,
}
}
}
pub(crate) fn alerts_to_err(id: &str, alerts: Option<Vec<Alert>>) -> ExtractionError {
ExtractionError::NotFound {
id: id.to_owned(),
@ -480,9 +571,11 @@ pub(crate) struct PhMetadataView {
pub content_metadata_view_model: PhMetadataView2,
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct PhMetadataView2 {
#[serde_as(as = "VecSkipError<_>")]
pub metadata_rows: Vec<PhMetadataRow>,
}
@ -498,17 +591,26 @@ pub(crate) struct PhMetadataRow {
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) enum MetadataPart {
Text(#[serde_as(deserialize_as = "AttributedText")] String),
Text(#[serde_as(deserialize_as = "AttributedText")] TextComponent),
#[serde(rename_all = "camelCase")]
AvatarStack {
avatar_stack_view_model: AvatarStackViewModel,
avatar_stack_view_model: TextComponentBox,
},
}
impl MetadataPart {
pub fn into_text_component(self) -> TextComponent {
match self {
MetadataPart::Text(text_component) => text_component,
MetadataPart::AvatarStack {
avatar_stack_view_model,
} => avatar_stack_view_model.text,
}
}
pub fn as_str(&self) -> &str {
match self {
MetadataPart::Text(s) => s,
MetadataPart::Text(s) => s.as_str(),
MetadataPart::AvatarStack {
avatar_stack_view_model,
} => avatar_stack_view_model.text.as_str(),
@ -516,24 +618,14 @@ impl MetadataPart {
}
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct AvatarStackViewModel {
#[serde_as(deserialize_as = "AttributedText")]
pub text: TextComponent,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct ContentImage {
pub collection_thumbnail_view_model: CollectionThumbnailViewModel,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct CollectionThumbnailViewModel {
pub primary_thumbnail: ThumbnailViewModelWrap,
pub(crate) enum ContentImage {
ThumbnailViewModel(ImageViewOl),
#[serde(rename_all = "camelCase")]
CollectionThumbnailViewModel {
primary_thumbnail: ThumbnailViewModelWrap,
},
}
#[derive(Debug, Deserialize)]

View file

@ -4,12 +4,9 @@ use serde_with::{
};
use time::OffsetDateTime;
use super::{ChannelBadge, ContentImage, ContinuationEndpoint, Thumbnails};
use super::{ChannelBadge, ContentImage, ContinuationEndpoint, PhMetadataView, Thumbnails};
use crate::{
model::{
Channel, ChannelId, ChannelItem, ChannelTag, PlaylistItem, Verification, VideoItem,
YouTubeItem,
},
model::{Channel, ChannelItem, ChannelTag, PlaylistItem, VideoItem, YouTubeItem},
param::Language,
serializer::{
text::{AttributedText, Text, TextComponent},
@ -167,23 +164,25 @@ pub(crate) struct ShortsOverlayMetadata {
pub secondary_text: Option<String>,
}
/// Generalized list item, currently only used for playlists
/// Generalized list item, currently only used for channel playlists and YTM items
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct LockupViewModel {
pub content_image: ContentImage,
pub metadata: LockupViewModelMetadata,
pub content_id: String,
#[serde(default)]
#[serde_as(deserialize_as = "DefaultOnError")]
pub content_type: LockupContentType,
pub content_image: ContentImage,
pub metadata: LockupViewModelMetadata,
}
#[derive(Default, Debug, Deserialize)]
#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
#[allow(clippy::enum_variant_names)]
pub(crate) enum LockupContentType {
LockupContentTypePlaylist,
LockupContentTypeVideo,
#[default]
Unknown,
}
@ -200,6 +199,7 @@ pub(crate) struct LockupViewModelMetadata {
pub(crate) struct LockupViewModelMetadataInner {
#[serde_as(as = "AttributedText")]
pub title: String,
pub metadata: PhMetadataView,
}
/// Video displayed in a playlist
@ -509,19 +509,18 @@ impl<T> YouTubeListMapper<T> {
thumbnail: video.thumbnail.into(),
channel: video
.channel
.and_then(|c| {
ChannelId::try_from(c).ok().map(|c| ChannelTag {
id: c.id,
name: c.name,
avatar: video
.channel_thumbnail_supported_renderers
.map(|tn| tn.channel_thumbnail_with_link_renderer.thumbnail)
.or(video.channel_thumbnail)
.unwrap_or_default()
.into(),
verification: video.owner_badges.into(),
subscriber_count: None,
})
.and_then(|c| ChannelTag::try_from(c).ok())
.map(|mut c| {
c.avatar = video
.channel_thumbnail_supported_renderers
.map(|tn| tn.channel_thumbnail_with_link_renderer.thumbnail)
.or(video.channel_thumbnail)
.unwrap_or_default()
.into();
if !c.verification.verified() {
c.verification = video.owner_badges.into();
}
c
})
.or_else(|| self.channel.clone()),
publish_date: video
@ -603,16 +602,7 @@ impl<T> YouTubeListMapper<T> {
}
fn map_playlist_video(&mut self, video: PlaylistVideoRenderer) -> VideoItem {
let channel = ChannelId::try_from(video.channel)
.ok()
.map(|ch| ChannelTag {
id: ch.id,
name: ch.name,
avatar: Vec::new(),
verification: Verification::None,
subscriber_count: None,
});
let channel = ChannelTag::try_from(video.channel).ok();
let mut video_info = video.video_info.into_iter();
let video_info1 = video_info
.next()
@ -675,14 +665,12 @@ impl<T> YouTubeListMapper<T> {
.into(),
channel: playlist
.channel
.and_then(|c| {
ChannelId::try_from(c).ok().map(|c| ChannelTag {
id: c.id,
name: c.name,
avatar: Vec::new(),
verification: playlist.owner_badges.into(),
subscriber_count: None,
})
.and_then(|c| ChannelTag::try_from(c).ok())
.map(|mut c| {
if !c.verification.verified() {
c.verification = playlist.owner_badges.into();
}
c
})
.or_else(|| self.channel.clone()),
video_count: playlist.video_count.or_else(|| {
@ -719,34 +707,88 @@ impl<T> YouTubeListMapper<T> {
}
}
fn map_lockup(&mut self, lockup: LockupViewModel) -> Option<PlaylistItem> {
fn map_lockup(&mut self, lockup: LockupViewModel) -> Option<YouTubeItem> {
let md = lockup.metadata.lockup_metadata_view_model;
let tn = lockup
.content_image
.collection_thumbnail_view_model
.primary_thumbnail
.thumbnail_view_model;
let tn = lockup.content_image.into_image();
match lockup.content_type {
LockupContentType::LockupContentTypePlaylist => Some(PlaylistItem {
id: lockup.content_id,
name: md.title,
thumbnail: tn.image.into(),
channel: self.channel.clone(),
video_count: tn
.overlays
.first()
.and_then(|ol| {
ol.thumbnail_overlay_badge_view_model
.thumbnail_badges
.first()
})
.and_then(|badge| {
util::parse_numeric_or_warn(
&badge.thumbnail_badge_view_model.text,
&mut self.warnings,
LockupContentType::LockupContentTypePlaylist => {
Some(YouTubeItem::Playlist(PlaylistItem {
id: lockup.content_id,
name: md.title,
thumbnail: tn.image.into(),
channel: self.channel.clone(),
video_count: tn
.overlays
.first()
.and_then(|ol| {
ol.thumbnail_overlay_badge_view_model
.thumbnail_badges
.first()
})
.and_then(|badge| {
util::parse_numeric_or_warn(
&badge.thumbnail_badge_view_model.text,
&mut self.warnings,
)
}),
}))
}
LockupContentType::LockupContentTypeVideo => {
let mut mdr = md
.metadata
.content_metadata_view_model
.metadata_rows
.into_iter();
let channel = mdr
.next()
.and_then(|r| r.metadata_parts.into_iter().next())
.and_then(|p| ChannelTag::try_from(p.into_text_component()).ok());
let (view_count, publish_date_txt) = mdr
.next()
.map(|metadata_row| {
let mut parts = metadata_row.metadata_parts.into_iter();
let p1 = parts.next();
let p2 = parts.next();
(
p1.and_then(|p| {
util::parse_large_numstr_or_warn(
p.as_str(),
self.lang,
&mut self.warnings,
)
}),
p2.map(|p2| p2.into_text_component().into_string()),
)
})
.unwrap_or_default();
Some(YouTubeItem::Video(VideoItem {
id: lockup.content_id,
name: md.title,
duration: tn
.overlays
.first()
.and_then(|ol| {
ol.thumbnail_overlay_badge_view_model
.thumbnail_badges
.first()
})
.and_then(|badge| {
util::parse_video_length(&badge.thumbnail_badge_view_model.text)
}),
thumbnail: tn.image.into(),
channel,
publish_date: publish_date_txt.as_deref().and_then(|t| {
timeago::parse_textual_date_or_warn(self.lang, t, &mut self.warnings)
}),
}),
publish_date_txt,
view_count,
is_live: false,
is_short: false,
is_upcoming: false,
short_description: None,
}))
}
LockupContentType::Unknown => None,
}
}
@ -782,7 +824,7 @@ impl YouTubeListMapper<YouTubeItem> {
}
YouTubeListItem::LockupViewModel(lockup) => {
if let Some(mapped) = self.map_lockup(lockup) {
self.items.push(YouTubeItem::Playlist(mapped));
self.items.push(mapped);
}
}
YouTubeListItem::ContinuationItemRenderer {
@ -828,6 +870,11 @@ impl YouTubeListMapper<VideoItem> {
let mapped = self.map_playlist_video(video);
self.items.push(mapped);
}
YouTubeListItem::LockupViewModel(lockup) => {
if let Some(YouTubeItem::Video(mapped)) = self.map_lockup(lockup) {
self.items.push(mapped);
}
}
YouTubeListItem::ContinuationItemRenderer {
continuation_endpoint,
} => self.ctoken = Some(continuation_endpoint.continuation_command.token),
@ -859,7 +906,7 @@ impl YouTubeListMapper<PlaylistItem> {
self.items.push(mapped);
}
YouTubeListItem::LockupViewModel(lockup) => {
if let Some(mapped) = self.map_lockup(lockup) {
if let Some(YouTubeItem::Playlist(mapped)) = self.map_lockup(lockup) {
self.items.push(mapped);
}
}

View file

@ -252,6 +252,7 @@ impl MapResponse<VideoDetails> for response::VideoDetails {
text,
page_type,
browse_id,
..
} => match page_type {
response::url_endpoint::PageType::Channel => (browse_id, text),
_ => {