use once_cell::sync::Lazy; use regex::Regex; use serde::Deserialize; use serde_with::{ json::JsonString, rust::deserialize_ignore_any, serde_as, DefaultOnError, VecSkipError, }; use time::OffsetDateTime; use super::{ChannelBadge, ContinuationEndpoint, Thumbnails}; use crate::{ model::{ Channel, ChannelId, ChannelItem, ChannelTag, PlaylistItem, Verification, VideoItem, YouTubeItem, }, param::Language, serializer::{ text::{AccessibilityText, Text, TextComponent}, MapResult, }, util::{self, timeago, TryRemove}, }; #[serde_as] #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub(crate) enum YouTubeListItem { #[serde(alias = "gridVideoRenderer", alias = "compactVideoRenderer")] VideoRenderer(VideoRenderer), ReelItemRenderer(ReelItemRenderer), PlaylistVideoRenderer(PlaylistVideoRenderer), #[serde(alias = "gridPlaylistRenderer")] PlaylistRenderer(PlaylistRenderer), ChannelRenderer(ChannelRenderer), /// Continauation items are located at the end of a list /// and contain the continuation token for progressive loading #[serde(rename_all = "camelCase")] ContinuationItemRenderer { continuation_endpoint: ContinuationEndpoint, }, /// Corrected search query #[serde(rename_all = "camelCase")] ShowingResultsForRenderer { #[serde_as(as = "Text")] corrected_query: String, }, /// Contains video on startpage /// /// Seems to be currently A/B tested on the channel page, /// as of 11.10.2022 #[serde(alias = "shelfRenderer")] RichItemRenderer { content: Box, }, /// Contains search results /// /// Seems to be currently A/B tested on the video details page, /// as of 11.10.2022 /// /// GridRenderer: contains videos on channel page #[serde(alias = "expandedShelfContentsRenderer", alias = "gridRenderer")] ItemSectionRenderer { #[serde(alias = "items")] contents: MapResult>, }, /// No video list item (e.g. ad) or unimplemented item /// /// Unimplemented: /// - compactPlaylistRenderer (recommended playlists) /// - compactRadioRenderer (recommended mix) #[serde(other, deserialize_with = "deserialize_ignore_any")] None, } #[serde_as] #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub(crate) struct VideoRenderer { pub video_id: String, pub thumbnail: Thumbnails, #[serde_as(as = "Text")] pub title: String, #[serde(rename = "shortBylineText")] pub channel: Option, pub channel_thumbnail: Option, pub channel_thumbnail_supported_renderers: Option, #[serde_as(as = "Option")] pub published_time_text: Option, #[serde_as(as = "Option")] pub length_text: Option, /// Contains `No views` if the view count is zero #[serde_as(as = "Option")] pub view_count_text: Option, /// Channel verification badge #[serde(default)] #[serde_as(as = "VecSkipError<_>")] pub owner_badges: Vec, /// Contains live tag for recommended videos #[serde(default)] #[serde_as(as = "VecSkipError<_>")] pub badges: Vec, /// Contains Short/Live tag #[serde(default)] #[serde_as(as = "VecSkipError<_>")] pub thumbnail_overlays: Vec, /// Abbreviated video description (on startpage) #[serde_as(as = "Option")] pub description_snippet: Option, /// Contains abbreviated video description (on search page) #[serde_as(as = "Option>")] pub detailed_metadata_snippets: Option>, /// Release date for upcoming videos pub upcoming_event_data: Option, } /// Short video item #[serde_as] #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub(crate) struct ReelItemRenderer { pub video_id: String, pub thumbnail: Thumbnails, #[serde_as(as = "Text")] pub headline: String, /// Contains `No views` if the view count is zero #[serde_as(as = "Option")] pub view_count_text: Option, /// video duration /// /// Example: `the horror maze - 44 seconds - play video` /// /// Dashes may be `\u2013` (emdash) #[serde_as(as = "Option")] pub accessibility: Option, #[serde(default)] #[serde_as(as = "DefaultOnError")] pub navigation_endpoint: Option, } /// Video displayed in a playlist #[serde_as] #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub(crate) struct PlaylistVideoRenderer { pub video_id: String, pub thumbnail: Thumbnails, #[serde_as(as = "Text")] pub title: String, #[serde(rename = "shortBylineText")] pub channel: TextComponent, #[serde_as(as = "Option")] pub length_seconds: Option, /// Regular video: `["29K views", " • ", "13 years ago"]` /// Livestream: `["66K", " watching"]` /// Upcoming: `["8", " waiting"]` #[serde(default)] #[serde_as(as = "DefaultOnError")] pub video_info: Vec, /// Contains Short/Live tag #[serde(default)] #[serde_as(as = "VecSkipError<_>")] pub thumbnail_overlays: Vec, /// Release date for upcoming videos pub upcoming_event_data: Option, } /// Playlist displayed in search results #[serde_as] #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub(crate) struct PlaylistRenderer { pub playlist_id: String, #[serde_as(as = "Text")] pub title: String, pub thumbnail: Option, /// Used by playlists from search page /// /// The first item of this list contains the playlist thumbnail, /// subsequent items contain very small thumbnails of the next playlist videos pub thumbnails: Option>, #[serde_as(as = "Option")] pub video_count: Option, #[serde_as(as = "Option")] pub video_count_short_text: Option, #[serde(rename = "shortBylineText")] pub channel: Option, /// Channel verification badge #[serde(default)] #[serde_as(as = "VecSkipError<_>")] pub owner_badges: Vec, } /// Channel displayed in search results #[serde_as] #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub(crate) struct ChannelRenderer { pub channel_id: String, #[serde_as(as = "Text")] pub title: String, pub thumbnail: Thumbnails, /// Abbreviated channel description /// /// Not present if the channel has no description #[serde(default)] #[serde_as(as = "Text")] pub description_snippet: String, /// Not present if the channel has no videos #[serde_as(as = "Option")] pub video_count_text: Option, #[serde_as(as = "Option")] pub subscriber_count_text: Option, /// Channel verification badge #[serde(default)] #[serde_as(as = "VecSkipError<_>")] pub owner_badges: Vec, } #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub(crate) struct YouTubeListRendererWrap { #[serde(alias = "richGridRenderer")] pub section_list_renderer: YouTubeListRenderer, } #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub(crate) struct YouTubeListRenderer { pub contents: MapResult>, } #[serde_as] #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub(crate) struct UpcomingEventData { /// Unixtime in seconds #[serde_as(as = "JsonString")] pub start_time: i64, } #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub(crate) struct TimeOverlay { pub thumbnail_overlay_time_status_renderer: TimeOverlayRenderer, } /// Badges are displayed on the video thumbnail and /// show certain video properties (e.g. active livestream) #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub(crate) struct VideoBadge { pub metadata_badge_renderer: VideoBadgeRenderer, } /// Badges are displayed on the video thumbnail and /// show certain video properties (e.g. active livestream) #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub(crate) struct VideoBadgeRenderer { pub style: VideoBadgeStyle, } #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Eq, Hash)] #[serde(rename_all = "SCREAMING_SNAKE_CASE")] pub(crate) enum VideoBadgeStyle { /// Active livestream BadgeStyleTypeLiveNow, } #[serde_as] #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub(crate) struct TimeOverlayRenderer { /// `29:54` /// /// Is `LIVE` in case of a livestream and `SHORTS` in case of a short video #[serde_as(as = "Text")] pub text: String, #[serde(default)] #[serde_as(deserialize_as = "DefaultOnError")] pub style: TimeOverlayStyle, } #[derive(Default, Clone, Copy, Debug, Deserialize, PartialEq, Eq)] #[serde(rename_all = "SCREAMING_SNAKE_CASE")] pub(crate) enum TimeOverlayStyle { #[default] Default, Live, Shorts, } #[serde_as] #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub(crate) struct DetailedMetadataSnippet { #[serde_as(as = "Text")] pub snippet_text: String, } #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub(crate) struct ChannelThumbnailSupportedRenderers { pub channel_thumbnail_with_link_renderer: ChannelThumbnailWithLinkRenderer, } #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub(crate) struct ChannelThumbnailWithLinkRenderer { pub thumbnail: Thumbnails, } /// Short video item navigation endpoint (contains upload date) #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub(crate) struct ReelNavigationEndpoint { pub reel_watch_endpoint: ReelWatchEndpoint, } #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub(crate) struct ReelWatchEndpoint { pub overlay: ReelPlayerOverlay, } #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub(crate) struct ReelPlayerOverlay { pub reel_player_overlay_renderer: ReelPlayerOverlayRenderer, } #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub(crate) struct ReelPlayerOverlayRenderer { pub reel_player_header_supported_renderers: ReelPlayerHeaderRenderers, } #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub(crate) struct ReelPlayerHeaderRenderers { pub reel_player_header_renderer: ReelPlayerHeaderRenderer, } #[serde_as] #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub(crate) struct ReelPlayerHeaderRenderer { #[serde_as(as = "Text")] pub timestamp_text: String, } trait IsLive { fn is_live(&self) -> bool; } trait IsShort { fn is_short(&self) -> bool; } impl IsLive for Vec { fn is_live(&self) -> bool { self.iter().any(|badge| { badge.metadata_badge_renderer.style == VideoBadgeStyle::BadgeStyleTypeLiveNow }) } } impl IsLive for Vec { fn is_live(&self) -> bool { self.iter().any(|overlay| { overlay.thumbnail_overlay_time_status_renderer.style == TimeOverlayStyle::Live }) } } impl IsShort for Vec { fn is_short(&self) -> bool { self.iter().any(|overlay| { overlay.thumbnail_overlay_time_status_renderer.style == TimeOverlayStyle::Shorts }) } } static ACCESSIBILITY_SEP_REGEX: Lazy = Lazy::new(|| { Regex::new("(?:[ \u{00a0}][-\u{2013}\u{2014}] )|\u{2013}|(?:\u{055d} )|(?:\", )").unwrap() }); /// Result of mapping a list of different YouTube enities /// (videos, channels, playlists) #[derive(Debug)] pub(crate) struct YouTubeListMapper { lang: Language, channel: Option, pub items: Vec, pub warnings: Vec, pub ctoken: Option, pub corrected_query: Option, } impl YouTubeListMapper { pub fn new(lang: Language) -> Self { Self { lang, channel: None, items: Vec::new(), warnings: Vec::new(), ctoken: None, corrected_query: None, } } pub fn with_channel(lang: Language, channel: &Channel, warnings: Vec) -> Self { Self { lang, channel: Some(ChannelTag { id: channel.id.clone(), name: channel.name.clone(), avatar: Vec::new(), verification: channel.verification, subscriber_count: channel.subscriber_count, }), items: Vec::new(), warnings, ctoken: None, corrected_query: None, } } fn map_video(&mut self, video: VideoRenderer) -> VideoItem { let is_live = video.thumbnail_overlays.is_live() || video.badges.is_live(); let is_short = video.thumbnail_overlays.is_short(); let length_text = video.length_text.or_else(|| { video .thumbnail_overlays .into_iter() .find(|ol| { ol.thumbnail_overlay_time_status_renderer.style == TimeOverlayStyle::Default }) .map(|ol| ol.thumbnail_overlay_time_status_renderer.text) }); VideoItem { id: video.video_id, name: video.title, length: length_text.and_then(|txt| util::parse_video_length(&txt)), thumbnail: video.thumbnail.into(), channel: video .channel .and_then(|c| { ChannelId::try_from(c).ok().map(|c| ChannelTag { id: c.id, name: c.name, avatar: video .channel_thumbnail_supported_renderers .map(|tn| tn.channel_thumbnail_with_link_renderer.thumbnail) .or(video.channel_thumbnail) .unwrap_or_default() .into(), verification: video.owner_badges.into(), subscriber_count: None, }) }) .or_else(|| self.channel.clone()), publish_date: video .upcoming_event_data .as_ref() .and_then(|upc| OffsetDateTime::from_unix_timestamp(upc.start_time).ok()) .or_else(|| { video.published_time_text.as_ref().and_then(|txt| { timeago::parse_timeago_dt_or_warn(self.lang, txt, &mut self.warnings) }) }), publish_date_txt: video.published_time_text, view_count: video .view_count_text .map(|txt| util::parse_numeric(&txt).unwrap_or_default()), is_live, is_short, is_upcoming: video.upcoming_event_data.is_some(), short_description: video .detailed_metadata_snippets .and_then(|snippets| snippets.into_iter().next().map(|s| s.snippet_text)) .or(video.description_snippet), } } fn map_short_video(&mut self, video: ReelItemRenderer) -> VideoItem { let pub_date_txt = video.navigation_endpoint.map(|n| { n.reel_watch_endpoint .overlay .reel_player_overlay_renderer .reel_player_header_supported_renderers .reel_player_header_renderer .timestamp_text }); let length = video.accessibility.and_then(|acc| { // The video title has to be stripped from the beginning because in Swahili // the duration follows the title with no separator (probably a bug). // Example: `what I do with leftoversdakika 1 - cheza video` let parts = ACCESSIBILITY_SEP_REGEX .split(acc.trim_start_matches(&video.headline)) .collect::>(); if parts.len() > 1 { // In Russian, the duration is the last part // Example: `Воспроизвести видео – \"hangover food\". Его продолжительность – 58 секунд.` let i = match self.lang { Language::Ru => 1, _ => 2, }; timeago::parse_video_duration_or_warn( self.lang, parts[parts.len() - i], &mut self.warnings, ) } else { self.warnings .push(format!("could not split video duration `{acc}`")); None } }); VideoItem { id: video.video_id, name: video.headline, length, thumbnail: video.thumbnail.into(), channel: self.channel.clone(), publish_date: pub_date_txt.as_ref().and_then(|txt| { timeago::parse_timeago_dt_or_warn(self.lang, txt, &mut self.warnings) }), publish_date_txt: pub_date_txt, view_count: video.view_count_text.and_then(|txt| { util::parse_large_numstr_or_warn(&txt, self.lang, &mut self.warnings) }), is_live: false, is_short: true, is_upcoming: false, short_description: None, } } fn map_playlist_video(&mut self, video: PlaylistVideoRenderer) -> VideoItem { let channel = ChannelId::try_from(video.channel) .ok() .map(|ch| ChannelTag { id: ch.id, name: ch.name, avatar: Vec::new(), verification: Verification::None, subscriber_count: None, }); let mut video_info = video.video_info.into_iter(); let video_info1 = video_info .next() .map(|s| match video_info.next().as_deref() { None | Some(util::DOT_SEPARATOR) => s, Some(s2) => s + s2, }); let video_info2 = video_info.next(); // RU: "7 лет назад" " • " "210 млн просмотров" (order flipped) let (view_count_txt, publish_date_txt) = if self.lang == Language::Ru && video_info2.is_some() { (video_info2, video_info1) } else { (video_info1, video_info2) }; let is_live = video.thumbnail_overlays.is_live(); let publish_date = video .upcoming_event_data .as_ref() .and_then(|upc| OffsetDateTime::from_unix_timestamp(upc.start_time).ok()) .or_else(|| { if is_live { None } else { publish_date_txt.as_ref().and_then(|txt| { timeago::parse_timeago_dt_or_warn(self.lang, txt, &mut self.warnings) }) } }); VideoItem { id: video.video_id, name: video.title, length: video.length_seconds, thumbnail: video.thumbnail.into(), channel, publish_date, publish_date_txt, view_count: view_count_txt.and_then(|txt| { util::parse_large_numstr_or_warn(&txt, self.lang, &mut self.warnings) }), is_live, is_short: video.thumbnail_overlays.is_short(), is_upcoming: video.upcoming_event_data.is_some(), short_description: None, } } fn map_playlist(&self, playlist: PlaylistRenderer) -> PlaylistItem { PlaylistItem { id: playlist.playlist_id, name: playlist.title, thumbnail: playlist .thumbnail .or_else(|| playlist.thumbnails.and_then(|mut t| t.try_swap_remove(0))) .unwrap_or_default() .into(), channel: playlist .channel .and_then(|c| { ChannelId::try_from(c).ok().map(|c| ChannelTag { id: c.id, name: c.name, avatar: Vec::new(), verification: playlist.owner_badges.into(), subscriber_count: None, }) }) .or_else(|| self.channel.clone()), video_count: playlist.video_count.or_else(|| { playlist .video_count_short_text .and_then(|txt| util::parse_numeric(&txt).ok()) }), } } fn map_channel(&mut self, channel: ChannelRenderer) -> ChannelItem { // channel handle instead of subscriber count (A/B test 3) let (sc_txt, vc_text) = if channel .subscriber_count_text .as_ref() .map(|txt| txt.starts_with('@')) .unwrap_or_default() { (channel.video_count_text, None) } else { (channel.subscriber_count_text, channel.video_count_text) }; ChannelItem { id: channel.channel_id, name: channel.title, avatar: channel.thumbnail.into(), verification: channel.owner_badges.into(), subscriber_count: sc_txt.and_then(|txt| { util::parse_large_numstr_or_warn(&txt, self.lang, &mut self.warnings) }), video_count: vc_text.and_then(|txt| { util::parse_large_numstr_or_warn(&txt, self.lang, &mut self.warnings) }), short_description: channel.description_snippet, } } } impl YouTubeListMapper { fn map_item(&mut self, item: YouTubeListItem) { match item { YouTubeListItem::VideoRenderer(video) => { let mapped = YouTubeItem::Video(self.map_video(video)); self.items.push(mapped); } YouTubeListItem::ReelItemRenderer(video) => { let mapped = self.map_short_video(video); self.items.push(YouTubeItem::Video(mapped)); } YouTubeListItem::PlaylistVideoRenderer(video) => { let mapped = self.map_playlist_video(video); self.items.push(YouTubeItem::Video(mapped)); } YouTubeListItem::PlaylistRenderer(playlist) => { let mapped = YouTubeItem::Playlist(self.map_playlist(playlist)); self.items.push(mapped); } YouTubeListItem::ChannelRenderer(channel) => { let mapped = YouTubeItem::Channel(self.map_channel(channel)); self.items.push(mapped); } YouTubeListItem::ContinuationItemRenderer { continuation_endpoint, } => self.ctoken = Some(continuation_endpoint.continuation_command.token), YouTubeListItem::ShowingResultsForRenderer { corrected_query } => { self.corrected_query = Some(corrected_query); } YouTubeListItem::RichItemRenderer { content } => { self.map_item(*content); } YouTubeListItem::ItemSectionRenderer { mut contents } => { self.warnings.append(&mut contents.warnings); contents.c.into_iter().for_each(|it| self.map_item(it)); } YouTubeListItem::None => {} } } pub(crate) fn map_response(&mut self, mut res: MapResult>) { self.warnings.append(&mut res.warnings); res.c.into_iter().for_each(|item| self.map_item(item)); } } impl YouTubeListMapper { fn map_item(&mut self, item: YouTubeListItem) { match item { YouTubeListItem::VideoRenderer(video) => { let mapped = self.map_video(video); self.items.push(mapped); } YouTubeListItem::ReelItemRenderer(video) => { let mapped = self.map_short_video(video); self.items.push(mapped); } YouTubeListItem::PlaylistVideoRenderer(video) => { let mapped = self.map_playlist_video(video); self.items.push(mapped); } YouTubeListItem::ContinuationItemRenderer { continuation_endpoint, } => self.ctoken = Some(continuation_endpoint.continuation_command.token), YouTubeListItem::ShowingResultsForRenderer { corrected_query } => { self.corrected_query = Some(corrected_query); } YouTubeListItem::RichItemRenderer { content } => { self.map_item(*content); } YouTubeListItem::ItemSectionRenderer { mut contents } => { self.warnings.append(&mut contents.warnings); contents.c.into_iter().for_each(|it| self.map_item(it)); } _ => {} } } pub(crate) fn map_response(&mut self, mut res: MapResult>) { self.warnings.append(&mut res.warnings); res.c.into_iter().for_each(|item| self.map_item(item)); } } impl YouTubeListMapper { fn map_item(&mut self, item: YouTubeListItem) { match item { YouTubeListItem::PlaylistRenderer(playlist) => { let mapped = self.map_playlist(playlist); self.items.push(mapped); } YouTubeListItem::ContinuationItemRenderer { continuation_endpoint, } => self.ctoken = Some(continuation_endpoint.continuation_command.token), YouTubeListItem::ShowingResultsForRenderer { corrected_query } => { self.corrected_query = Some(corrected_query); } YouTubeListItem::RichItemRenderer { content } => { self.map_item(*content); } YouTubeListItem::ItemSectionRenderer { mut contents } => { self.warnings.append(&mut contents.warnings); contents.c.into_iter().for_each(|it| self.map_item(it)); } _ => {} } } pub(crate) fn map_response(&mut self, mut res: MapResult>) { self.warnings.append(&mut res.warnings); res.c.into_iter().for_each(|item| self.map_item(item)); } } #[cfg(test)] mod tests { use super::ACCESSIBILITY_SEP_REGEX; use rstest::rstest; #[rstest] #[case::af( "BTS - Permission to Dance Cover #shorts #pinkfong – 50 sekondes – speel video", "50 sekondes" )] #[case::de( "Point of view: Me VS My mom #shorts – 8 Sekunden – Video wiedergeben", "8 Sekunden" )] #[case::be( "Point of view: Me VS My mom #shorts–8 секунд – прайграць відэа", "8 секунд" )] #[case::fil("do u wanna get swole? - 53 segundo - i-play ang video", "53 segundo")] #[case::ar( "«the holy trinity of korean street food»՝ 1 րոպե՝ նվագարկել տեսանյութը", "1 րոպե" )] #[case::lv( "what i ate in google japan — 1 minūte — atskaņot videoklipu", "1 minūte" )] #[case::sq("When you impulse buy... - 1 minutë - luaj videon", "1 minutë")] #[case::uk( "\"Point of view: Me VS My mom #shorts\", 8 секунд – відтворити відео", "8 секунд" )] // INFO: sw is unparseable "coming soonsekunde 58 - cheza video" fn split_duration_txt(#[case] s: &str, #[case] expect: &str) { let parts = ACCESSIBILITY_SEP_REGEX.split(s).collect::>(); assert_eq!(parts[parts.len() - 2], expect); } #[test] fn split_duration_txt_ru() { let s = "Воспроизвести видео – \"the holy trinity of korean street food\". Его продолжительность – 1 минута."; let parts = ACCESSIBILITY_SEP_REGEX.split(s).collect::>(); assert_eq!(parts[parts.len() - 1], "1 минута."); } }