Merge branch 'intl-tests'

This commit is contained in:
ThetaDev 2023-05-07 15:06:27 +02:00
commit b3331b36a7
69 changed files with 55604 additions and 32189 deletions

View file

@ -179,8 +179,11 @@ impl MapResponse<Channel<Paginator<VideoItem>>> for response::Channel {
lang,
)?;
let mut mapper =
response::YouTubeListMapper::<VideoItem>::with_channel(lang, &channel_data);
let mut mapper = response::YouTubeListMapper::<VideoItem>::with_channel(
lang,
&channel_data.c,
channel_data.warnings,
);
mapper.map_response(content.content);
let p = Paginator::new_ext(
None,
@ -191,7 +194,7 @@ impl MapResponse<Channel<Paginator<VideoItem>>> for response::Channel {
);
Ok(MapResult {
c: combine_channel_data(channel_data, p),
c: combine_channel_data(channel_data.c, p),
warnings: mapper.warnings,
})
}
@ -219,13 +222,16 @@ impl MapResponse<Channel<Paginator<PlaylistItem>>> for response::Channel {
lang,
)?;
let mut mapper =
response::YouTubeListMapper::<PlaylistItem>::with_channel(lang, &channel_data);
let mut mapper = response::YouTubeListMapper::<PlaylistItem>::with_channel(
lang,
&channel_data.c,
channel_data.warnings,
);
mapper.map_response(content.content);
let p = Paginator::new(None, mapper.items, mapper.ctoken);
Ok(MapResult {
c: combine_channel_data(channel_data, p),
c: combine_channel_data(channel_data.c, p),
warnings: mapper.warnings,
})
}
@ -266,7 +272,7 @@ impl MapResponse<Channel<ChannelInfo>> for response::Channel {
});
Ok(MapResult {
c: combine_channel_data(channel_data, cinfo),
c: combine_channel_data(channel_data.c, cinfo),
warnings,
})
}
@ -297,7 +303,7 @@ fn map_channel(
d: MapChannelData,
id: &str,
lang: Language,
) -> Result<Channel<()>, ExtractionError> {
) -> Result<MapResult<Channel<()>>, ExtractionError> {
let header = d
.header
.ok_or(ExtractionError::ContentUnavailable(Cow::Borrowed(
@ -326,33 +332,35 @@ fn map_channel(
.vanity_channel_url
.as_ref()
.and_then(|url| map_vanity_url(url, id));
let mut warnings = Vec::new();
Ok(match header {
response::channel::Header::C4TabbedHeaderRenderer(header) => Channel {
id: metadata.external_id,
name: metadata.title,
subscriber_count: header
.subscriber_count_text
.and_then(|txt| util::parse_large_numstr(&txt, lang)),
avatar: header.avatar.into(),
verification: header.badges.into(),
description: metadata.description,
tags: microformat.microformat_data_renderer.tags,
vanity_url,
banner: header.banner.into(),
mobile_banner: header.mobile_banner.into(),
tv_banner: header.tv_banner.into(),
has_shorts: d.has_shorts,
has_live: d.has_live,
visitor_data: d.visitor_data,
content: (),
},
response::channel::Header::CarouselHeaderRenderer(carousel) => {
let hdata = carousel
.contents
.into_iter()
.filter_map(|item| {
match item {
Ok(MapResult {
c: match header {
response::channel::Header::C4TabbedHeaderRenderer(header) => Channel {
id: metadata.external_id,
name: metadata.title,
subscriber_count: header
.subscriber_count_text
.and_then(|txt| util::parse_large_numstr_or_warn(&txt, lang, &mut warnings)),
avatar: header.avatar.into(),
verification: header.badges.into(),
description: metadata.description,
tags: microformat.microformat_data_renderer.tags,
vanity_url,
banner: header.banner.into(),
mobile_banner: header.mobile_banner.into(),
tv_banner: header.tv_banner.into(),
has_shorts: d.has_shorts,
has_live: d.has_live,
visitor_data: d.visitor_data,
content: (),
},
response::channel::Header::CarouselHeaderRenderer(carousel) => {
let hdata = carousel
.contents
.into_iter()
.filter_map(|item| {
match item {
response::channel::CarouselHeaderRendererItem::TopicChannelDetailsRenderer {
subscriber_count_text,
subtitle,
@ -360,32 +368,33 @@ fn map_channel(
} => Some((subscriber_count_text.or(subtitle), avatar)),
response::channel::CarouselHeaderRendererItem::None => None,
}
})
.next();
})
.next();
Channel {
id: metadata.external_id,
name: metadata.title,
subscriber_count: hdata.as_ref().and_then(|hdata| {
hdata
.0
.as_ref()
.and_then(|txt| util::parse_large_numstr(txt, lang))
}),
avatar: hdata.map(|hdata| hdata.1.into()).unwrap_or_default(),
verification: crate::model::Verification::Verified,
description: metadata.description,
tags: microformat.microformat_data_renderer.tags,
vanity_url,
banner: Vec::new(),
mobile_banner: Vec::new(),
tv_banner: Vec::new(),
has_shorts: d.has_shorts,
has_live: d.has_live,
visitor_data: d.visitor_data,
content: (),
Channel {
id: metadata.external_id,
name: metadata.title,
subscriber_count: hdata.as_ref().and_then(|hdata| {
hdata.0.as_ref().and_then(|txt| {
util::parse_large_numstr_or_warn(txt, lang, &mut warnings)
})
}),
avatar: hdata.map(|hdata| hdata.1.into()).unwrap_or_default(),
verification: crate::model::Verification::Verified,
description: metadata.description,
tags: microformat.microformat_data_renderer.tags,
vanity_url,
banner: Vec::new(),
mobile_banner: Vec::new(),
tv_banner: Vec::new(),
has_shorts: d.has_shorts,
has_live: d.has_live,
visitor_data: d.visitor_data,
content: (),
}
}
}
},
warnings,
})
}
@ -401,7 +410,7 @@ fn map_channel_content(
) -> Result<MappedChannelContent, ExtractionError> {
match contents {
Some(contents) => {
let tabs = contents.two_column_browse_results_renderer.tabs;
let tabs = contents.two_column_browse_results_renderer.contents;
if tabs.is_empty() {
return Err(ExtractionError::ContentUnavailable(
"channel not found".into(),

View file

@ -269,7 +269,7 @@ fn map_artist_page(
}
}
let mapped = mapper.group_items();
let mut mapped = mapper.group_items();
static WIKIPEDIA_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"\(?https://[a-z\d-]+\.wikipedia.org/wiki/[^\s]+").unwrap());
@ -302,9 +302,10 @@ fn map_artist_page(
description: header.description,
wikipedia_url,
subscriber_count: header.subscription_button.and_then(|btn| {
util::parse_large_numstr(
util::parse_large_numstr_or_warn(
&btn.subscribe_button_renderer.subscriber_count_text,
lang,
&mut mapped.warnings,
)
}),
tracks: mapped.c.tracks,

View file

@ -207,22 +207,25 @@ impl MapResponse<TrackDetails> for response::MusicDetails {
response::music_item::PlaylistPanelVideo::None => None,
})
.ok_or(ExtractionError::InvalidData(Cow::Borrowed("no video item")))?;
let track = map_queue_item(track_item, lang);
let mut track = map_queue_item(track_item, lang);
if track.id != id {
if track.c.id != id {
return Err(ExtractionError::WrongResult(format!(
"got wrong video id {}, expected {}",
track.id, id
track.c.id, id
)));
}
let mut warnings = content.contents.warnings;
warnings.append(&mut track.warnings);
Ok(MapResult {
c: TrackDetails {
track,
track: track.c,
lyrics_id,
related_id,
},
warnings: content.contents.warnings,
warnings,
})
}
}
@ -251,13 +254,17 @@ impl MapResponse<Paginator<TrackItem>> for response::MusicDetails {
.content
.playlist_panel_renderer;
let mut warnings = content.contents.warnings;
let tracks = content
.contents
.c
.into_iter()
.filter_map(|item| match item {
response::music_item::PlaylistPanelVideo::PlaylistPanelVideoRenderer(item) => {
Some(map_queue_item(item, lang))
let mut track = map_queue_item(item, lang);
warnings.append(&mut track.warnings);
Some(track.c)
}
response::music_item::PlaylistPanelVideo::None => None,
})
@ -277,7 +284,7 @@ impl MapResponse<Paginator<TrackItem>> for response::MusicDetails {
None,
crate::model::paginator::ContinuationEndpoint::MusicNext,
),
warnings: content.contents.warnings,
warnings,
})
}
}

View file

@ -81,7 +81,7 @@ impl MapResponse<Vec<MusicGenreItem>> for response::MusicGenres {
let genres = content_iter
.enumerate()
.flat_map(|(i, grid)| {
let mut grid = grid.grid_renderer.items;
let mut grid = grid.grid_renderer.contents;
warnings.append(&mut grid.warnings);
grid.c.into_iter().filter_map(move |section| match section {
response::music_genres::NavigationButton::MusicNavigationButtonRenderer(

View file

@ -4,7 +4,7 @@ use crate::{
error::{Error, ExtractionError},
model::{paginator::Paginator, AlbumId, ChannelId, MusicAlbum, MusicPlaylist, TrackItem},
serializer::MapResult,
util::{self, TryRemove},
util::{self, TryRemove, DOT_SEPARATOR},
};
use super::{
@ -160,14 +160,19 @@ impl MapResponse<MusicPlaylist> for response::MusicPlaylist {
.try_swap_remove(0)
.map(|cont| cont.next_continuation_data.continuation);
let track_count = match ctoken {
Some(_) => self.header.as_ref().and_then(|h| {
h.music_detail_header_renderer
let track_count = if ctoken.is_some() {
self.header.as_ref().and_then(|h| {
let parts = h
.music_detail_header_renderer
.second_subtitle
.first()
.and_then(|txt| util::parse_numeric::<u64>(txt).ok())
}),
None => Some(map_res.c.len() as u64),
.split(|p| p == DOT_SEPARATOR)
.collect::<Vec<_>>();
parts
.get(if parts.len() > 2 { 1 } else { 0 })
.and_then(|txt| util::parse_numeric::<u64>(&txt[0]).ok())
})
} else {
Some(map_res.c.len() as u64)
};
let related_ctoken = music_contents
@ -179,11 +184,7 @@ impl MapResponse<MusicPlaylist> for response::MusicPlaylist {
Some(header) => {
let h = header.music_detail_header_renderer;
let from_ytm = h
.subtitle
.0
.iter()
.any(|c| c.as_str() == util::YT_MUSIC_NAME);
let from_ytm = h.subtitle.0.iter().any(util::is_ytm);
let channel = h
.subtitle
.0

View file

@ -157,7 +157,9 @@ impl MapResponse<Paginator<MusicItem>> for response::MusicContinuation {
mapper.add_warnings(&mut panel.contents.warnings);
panel.contents.c.into_iter().for_each(|item| {
if let PlaylistPanelVideo::PlaylistPanelVideoRenderer(item) = item {
mapper.add_item(MusicItem::Track(map_queue_item(item, lang)))
let mut track = map_queue_item(item, lang);
mapper.add_item(MusicItem::Track(track.c));
mapper.add_warnings(&mut track.warnings);
}
});
}

View file

@ -5,8 +5,7 @@ use time::OffsetDateTime;
use crate::{
error::{Error, ExtractionError},
model::{paginator::Paginator, ChannelId, Playlist, PlaylistVideo},
timeago,
util::{self, TryRemove},
util::{self, timeago, TryRemove},
};
use super::{response, ClientType, MapResponse, MapResult, QBrowse, QContinuation, RustyPipeQuery};
@ -94,7 +93,7 @@ impl MapResponse<Playlist> for response::Playlist {
let (thumbnails, last_update_txt) = match self.sidebar {
Some(sidebar) => {
let mut sidebar_items = sidebar.playlist_sidebar_renderer.items;
let mut sidebar_items = sidebar.playlist_sidebar_renderer.contents;
let mut primary =
sidebar_items
.try_swap_remove(0)

View file

@ -3,7 +3,7 @@ use serde_with::{rust::deserialize_ignore_any, serde_as, DefaultOnError, VecSkip
use super::{
video_item::YouTubeListRenderer, Alert, ChannelBadge, ContentsRenderer, ResponseContext,
Thumbnails,
Thumbnails, TwoColumnBrowseResults,
};
use crate::serializer::text::Text;
@ -22,21 +22,7 @@ pub(crate) struct Channel {
pub response_context: ResponseContext,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct Contents {
pub two_column_browse_results_renderer: TabsRenderer,
}
/// YouTube channel tab view. Contains multiple tabs
/// (Home, Videos, Playlists, About...). We can ignore unknown tabs.
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct TabsRenderer {
#[serde_as(as = "VecSkipError<_>")]
pub tabs: Vec<TabRendererWrap>,
}
pub(crate) type Contents = TwoColumnBrowseResults<TabRendererWrap>;
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]

View file

@ -47,12 +47,17 @@ pub(crate) mod channel_rss;
#[cfg(feature = "rss")]
pub(crate) use channel_rss::ChannelRss;
use serde::Deserialize;
use std::borrow::Cow;
use std::marker::PhantomData;
use serde::{
de::{IgnoredAny, Visitor},
Deserialize,
};
use serde_with::{json::JsonString, serde_as, VecSkipError};
use crate::error::ExtractionError;
use crate::serializer::MapResult;
use crate::serializer::{text::Text, VecLogError};
use crate::serializer::{text::Text, MapResult, VecSkipErrorWrap};
use self::video_item::YouTubeListRenderer;
@ -62,13 +67,17 @@ pub(crate) struct ContentRenderer<T> {
pub content: T,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
#[derive(Debug)]
pub(crate) struct ContentsRenderer<T> {
#[serde(alias = "tabs")]
pub contents: Vec<T>,
}
#[derive(Debug, Deserialize)]
pub(crate) struct ContentsRendererLogged<T> {
#[serde(alias = "items")]
pub contents: MapResult<Vec<T>>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct Tab<T> {
@ -81,6 +90,12 @@ pub(crate) struct SectionList<T> {
pub section_list_renderer: ContentsRenderer<T>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct TwoColumnBrowseResults<T> {
pub two_column_browse_results_renderer: ContentsRenderer<T>,
}
#[derive(Default, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct ThumbnailsWrap {
@ -207,11 +222,9 @@ pub(crate) struct ContinuationActionWrap {
pub append_continuation_items_action: ContinuationAction,
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct ContinuationAction {
#[serde_as(as = "VecLogError<_>")]
pub continuation_items: MapResult<Vec<YouTubeListItem>>,
}
@ -248,9 +261,53 @@ pub(crate) struct ErrorResponseContent {
pub message: String,
}
/*
#MAPPING
*/
// DESERIALIZER
impl<'de, T> Deserialize<'de> for ContentsRenderer<T>
where
T: Deserialize<'de>,
{
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
struct ItemVisitor<T>(PhantomData<T>);
impl<'de, T> Visitor<'de> for ItemVisitor<T>
where
T: Deserialize<'de>,
{
type Value = ContentsRenderer<T>;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("map")
}
fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error>
where
A: serde::de::MapAccess<'de>,
{
let mut contents = None;
while let Some(k) = map.next_key::<Cow<'de, str>>()? {
if k == "contents" || k == "tabs" || k == "items" {
contents = Some(ContentsRenderer {
contents: map.next_value::<VecSkipErrorWrap<T>>()?.0,
});
} else {
map.next_value::<IgnoredAny>()?;
}
}
contents.ok_or(serde::de::Error::missing_field("contents"))
}
}
deserializer.deserialize_map(ItemVisitor(PhantomData::<T>))
}
}
// MAPPING
impl From<Thumbnail> for crate::model::Thumbnail {
fn from(tn: Thumbnail) -> Self {

View file

@ -1,12 +1,12 @@
use serde::Deserialize;
use serde_with::{rust::deserialize_ignore_any, serde_as};
use crate::serializer::{text::Text, MapResult, VecLogError};
use crate::serializer::text::Text;
use super::{
music_item::{ItemSection, SimpleHeader, SingleColumnBrowseResult},
url_endpoint::BrowseEndpointWrap,
SectionList, Tab,
ContentsRendererLogged, SectionList, Tab,
};
#[derive(Debug, Deserialize)]
@ -18,15 +18,7 @@ pub(crate) struct MusicGenres {
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct Grid {
pub grid_renderer: GridRenderer,
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct GridRenderer {
#[serde_as(as = "VecLogError<_>")]
pub items: MapResult<Vec<NavigationButton>>,
pub grid_renderer: ContentsRendererLogged<NavigationButton>,
}
#[derive(Debug, Deserialize)]

View file

@ -9,7 +9,7 @@ use crate::{
param::Language,
serializer::{
text::{Text, TextComponents},
MapResult, VecLogError,
MapResult,
},
util::{self, dictionary, TryRemove},
};
@ -39,7 +39,6 @@ pub(crate) enum ItemSection {
pub(crate) struct MusicShelf {
/// Playlist ID (only for playlists)
pub playlist_id: Option<String>,
#[serde_as(as = "VecLogError<_>")]
pub contents: MapResult<Vec<MusicResponseItem>>,
/// Continuation token for fetching more (>100) playlist items
#[serde(default)]
@ -53,12 +52,10 @@ pub(crate) struct MusicShelf {
/// MusicCarouselShelf represents a horizontal list of music items displayed with
/// large covers.
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct MusicCarouselShelf {
pub header: Option<MusicCarouselShelfHeader>,
#[serde_as(as = "VecLogError<_>")]
pub contents: MapResult<Vec<MusicResponseItem>>,
}
@ -76,7 +73,6 @@ pub(crate) struct MusicCardShelf {
#[serde(default)]
pub thumbnail: MusicThumbnailRenderer,
#[serde(default)]
#[serde_as(as = "VecLogError<_>")]
pub contents: MapResult<Vec<MusicResponseItem>>,
}
@ -227,7 +223,6 @@ pub(crate) struct CoverMusicItem {
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct PlaylistPanelRenderer {
#[serde_as(as = "VecLogError<_>")]
pub contents: MapResult<Vec<PlaylistPanelVideo>>,
/// Continuation token for fetching more radio items
#[serde(default)]
@ -362,15 +357,7 @@ pub(crate) struct ButtonRenderer {
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct MusicItemMenu {
pub menu_renderer: MusicItemMenuRenderer,
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct MusicItemMenuRenderer {
#[serde_as(as = "VecSkipError<_>")]
pub items: Vec<MusicItemMenuEntry>,
pub menu_renderer: ContentsRenderer<MusicItemMenuEntry>,
}
#[derive(Debug, Deserialize)]
@ -385,11 +372,9 @@ pub(crate) struct Grid {
pub grid_renderer: GridRenderer,
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct GridRenderer {
#[serde_as(as = "VecLogError<_>")]
pub items: MapResult<Vec<MusicResponseItem>>,
pub header: Option<GridHeader>,
}
@ -587,7 +572,9 @@ impl MusicListMapper {
(subtitle_parts.rev().next(), None, None)
} else {
// Skip first part (track type)
if subtitle_parts.len() > 3 {
if subtitle_parts.len() > 3
|| (is_video && subtitle_parts.len() == 2)
{
subtitle_parts.next();
}
@ -618,7 +605,11 @@ impl MusicListMapper {
(FlexColumnDisplayStyle::TwoLines, true) => (
None,
album_p.and_then(|p| {
util::parse_large_numstr(p.first_str(), self.lang)
util::parse_large_numstr_or_warn(
p.first_str(),
self.lang,
&mut self.warnings,
)
}),
),
(_, false) => (
@ -692,7 +683,11 @@ impl MusicListMapper {
match page_type {
MusicPageType::Artist => {
let subscriber_count = subtitle_p2.and_then(|p| {
util::parse_large_numstr(p.first_str(), self.lang)
util::parse_large_numstr_or_warn(
p.first_str(),
self.lang,
&mut self.warnings,
)
});
self.items.push(MusicItem::Artist(ArtistItem {
@ -736,7 +731,8 @@ impl MusicListMapper {
let from_ytm = channel_p
.as_ref()
.map(|p| p.first_str() == util::YT_MUSIC_NAME)
.and_then(|p| p.0.first())
.map(util::is_ytm)
.unwrap_or_default();
let channel = channel_p.and_then(|p| {
p.0.into_iter().find_map(|c| ChannelId::try_from(c).ok())
@ -792,7 +788,11 @@ impl MusicListMapper {
artists,
album: None,
view_count: subtitle_p2.and_then(|c| {
util::parse_large_numstr(c.first_str(), self.lang)
util::parse_large_numstr_or_warn(
c.first_str(),
self.lang,
&mut self.warnings,
)
}),
is_video,
track_nr: None,
@ -801,8 +801,13 @@ impl MusicListMapper {
Ok(Some(MusicItemType::Track))
}
MusicPageType::Artist => {
let subscriber_count = subtitle_p1
.and_then(|p| util::parse_large_numstr(p.first_str(), self.lang));
let subscriber_count = subtitle_p1.and_then(|p| {
util::parse_large_numstr_or_warn(
p.first_str(),
self.lang,
&mut self.warnings,
)
});
self.items.push(MusicItem::Artist(ArtistItem {
id,
@ -868,7 +873,8 @@ impl MusicListMapper {
// (featured on the startpage or in genres)
let from_ytm = subtitle_p2
.as_ref()
.map(|p| p.first_str() == util::YT_MUSIC_NAME)
.and_then(|p| p.0.first())
.map(util::is_ytm)
.unwrap_or(true);
let channel = subtitle_p2.and_then(|p| {
p.0.into_iter().find_map(|c| ChannelId::try_from(c).ok())
@ -927,8 +933,13 @@ impl MusicListMapper {
let item_type = match card.on_tap.music_page() {
Some((page_type, id)) => match page_type {
MusicPageType::Artist => {
let subscriber_count = subtitle_p2
.and_then(|p| util::parse_large_numstr(p.first_str(), self.lang));
let subscriber_count = subtitle_p2.and_then(|p| {
util::parse_large_numstr_or_warn(
p.first_str(),
self.lang,
&mut self.warnings,
)
});
self.items.push(MusicItem::Artist(ArtistItem {
id,
@ -963,8 +974,13 @@ impl MusicListMapper {
let (album, view_count) = if is_video {
(
None,
subtitle_p3
.and_then(|p| util::parse_large_numstr(p.first_str(), self.lang)),
subtitle_p3.and_then(|p| {
util::parse_large_numstr_or_warn(
p.first_str(),
self.lang,
&mut self.warnings,
)
}),
)
} else {
(
@ -993,7 +1009,8 @@ impl MusicListMapper {
MusicPageType::Playlist => {
let from_ytm = subtitle_p2
.as_ref()
.map(|p| p.first_str() == util::YT_MUSIC_NAME)
.and_then(|p| p.0.first())
.map(util::is_ytm)
.unwrap_or(true);
let channel = subtitle_p2
.and_then(|p| p.0.into_iter().find_map(|c| ChannelId::try_from(c).ok()));
@ -1118,7 +1135,7 @@ fn map_artist_id_fallback(
menu: Option<MusicItemMenu>,
fallback_artist: Option<&ArtistId>,
) -> Option<String> {
menu.and_then(|m| map_artist_id(m.menu_renderer.items))
menu.and_then(|m| map_artist_id(m.menu_renderer.contents))
.or_else(|| fallback_artist.and_then(|a| a.id.to_owned()))
}
@ -1149,7 +1166,8 @@ pub(crate) fn map_album_type(txt: &str, lang: Language) -> AlbumType {
.unwrap_or_default()
}
pub(crate) fn map_queue_item(item: QueueMusicItem, lang: Language) -> TrackItem {
pub(crate) fn map_queue_item(item: QueueMusicItem, lang: Language) -> MapResult<TrackItem> {
let mut warnings = Vec::new();
let mut subtitle_parts = item.long_byline_text.split(util::DOT_SEPARATOR).into_iter();
let is_video = !item
@ -1167,7 +1185,8 @@ pub(crate) fn map_queue_item(item: QueueMusicItem, lang: Language) -> TrackItem
let (album, view_count) = if is_video {
(
None,
subtitle_p2.and_then(|p| util::parse_large_numstr(p.first_str(), lang)),
subtitle_p2
.and_then(|p| util::parse_large_numstr_or_warn(p.first_str(), lang, &mut warnings)),
)
} else {
(
@ -1176,20 +1195,23 @@ pub(crate) fn map_queue_item(item: QueueMusicItem, lang: Language) -> TrackItem
)
};
TrackItem {
id: item.video_id,
name: item.title,
duration: item
.length_text
.and_then(|txt| util::parse_video_length(&txt)),
cover: item.thumbnail.into(),
artists,
artist_id,
album,
view_count,
is_video,
track_nr: None,
by_va,
MapResult {
c: TrackItem {
id: item.video_id,
name: item.title,
duration: item
.length_text
.and_then(|txt| util::parse_video_length(&txt)),
cover: item.thumbnail.into(),
artists,
artist_id,
album,
view_count,
is_video,
track_nr: None,
by_va,
},
warnings,
}
}

View file

@ -58,6 +58,8 @@ pub(crate) struct HeaderRenderer {
/// Missing on artist_tracks view.
///
/// `"64 songs", " • ", "3 hours, 40 minutes"`
///
/// `"1B views", " • ", "200 songs", " • ", "6+ hours"`
#[serde(default)]
#[serde_as(as = "Text")]
pub second_subtitle: Vec<String>,

View file

@ -5,7 +5,7 @@ use serde_with::serde_as;
use serde_with::{json::JsonString, DefaultOnError};
use super::{ResponseContext, Thumbnails};
use crate::serializer::{text::Text, MapResult, VecLogError};
use crate::serializer::{text::Text, MapResult};
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
@ -75,10 +75,8 @@ pub(crate) struct StreamingData {
#[serde_as(as = "JsonString")]
pub expires_in_seconds: u32,
#[serde(default)]
#[serde_as(as = "VecLogError<_>")]
pub formats: MapResult<Vec<Format>>,
#[serde(default)]
#[serde_as(as = "VecLogError<_>")]
pub adaptive_formats: MapResult<Vec<Format>>,
/// Only on livestreams
pub dash_manifest_url: Option<String>,

View file

@ -3,20 +3,22 @@ use serde_with::{
json::JsonString, rust::deserialize_ignore_any, serde_as, DefaultOnError, VecSkipError,
};
use crate::serializer::text::{Text, TextComponent};
use crate::serializer::{MapResult, VecLogError};
use crate::serializer::{
text::{Text, TextComponent},
MapResult,
};
use crate::util::MappingError;
use super::{
Alert, ContentsRenderer, ContinuationEndpoint, ResponseContext, SectionList, Tab, Thumbnails,
ThumbnailsWrap,
ThumbnailsWrap, TwoColumnBrowseResults,
};
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct Playlist {
pub contents: Option<Contents>,
pub contents: Option<TwoColumnBrowseResults<Tab<SectionList<ItemSection>>>>,
pub header: Option<Header>,
pub sidebar: Option<Sidebar>,
#[serde_as(as = "Option<DefaultOnError>")]
@ -33,12 +35,6 @@ pub(crate) struct PlaylistCont {
pub on_response_received_actions: Vec<OnResponseReceivedAction>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct Contents {
pub two_column_browse_results_renderer: ContentsRenderer<Tab<SectionList<ItemSection>>>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct ItemSection {
@ -51,11 +47,9 @@ pub(crate) struct PlaylistVideoListRenderer {
pub playlist_video_list_renderer: PlaylistVideoList,
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct PlaylistVideoList {
#[serde_as(as = "VecLogError<_>")]
pub contents: MapResult<Vec<PlaylistItem>>,
}
@ -108,15 +102,7 @@ pub(crate) struct BylineRenderer {
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct Sidebar {
pub playlist_sidebar_renderer: SidebarRenderer,
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct SidebarRenderer {
#[serde_as(as = "VecSkipError<_>")]
pub items: Vec<SidebarItemPrimary>,
pub playlist_sidebar_renderer: ContentsRenderer<SidebarItemPrimary>,
}
#[derive(Debug, Deserialize)]
@ -199,10 +185,8 @@ pub(crate) struct OnResponseReceivedAction {
pub append_continuation_items_action: AppendAction,
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct AppendAction {
#[serde_as(as = "VecLogError<_>")]
pub continuation_items: MapResult<Vec<PlaylistItem>>,
}

View file

@ -1,4 +1,7 @@
use serde::{de::IgnoredAny, Deserialize};
use serde::{
de::{IgnoredAny, Visitor},
Deserialize,
};
use serde_with::{json::JsonString, serde_as};
use super::{video_item::YouTubeListRendererWrap, ResponseContext};
@ -26,8 +29,40 @@ pub(crate) struct TwoColumnSearchResultsRenderer {
}
#[derive(Debug, Deserialize)]
pub(crate) struct SearchSuggestion(
IgnoredAny,
pub Vec<(String, IgnoredAny, IgnoredAny)>,
IgnoredAny,
);
pub(crate) struct SearchSuggestion(IgnoredAny, pub Vec<SearchSuggestionItem>, IgnoredAny);
#[derive(Debug)]
pub(crate) struct SearchSuggestionItem(pub String);
impl<'de> Deserialize<'de> for SearchSuggestionItem {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
struct ItemVisitor;
impl<'de> Visitor<'de> for ItemVisitor {
type Value = SearchSuggestionItem;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("search suggestion item")
}
fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
where
A: serde::de::SeqAccess<'de>,
{
match seq.next_element::<String>()? {
Some(s) => {
// Ignore the rest of the list
while seq.next_element::<IgnoredAny>()?.is_some() {}
Ok(SearchSuggestionItem(s))
}
None => Err(serde::de::Error::invalid_length(0, &"1")),
}
}
}
deserializer.deserialize_seq(ItemVisitor)
}
}

View file

@ -1,7 +1,6 @@
use serde::Deserialize;
use serde_with::{serde_as, VecSkipError};
use super::{video_item::YouTubeListRendererWrap, ResponseContext, Tab};
use super::{video_item::YouTubeListRendererWrap, ResponseContext, Tab, TwoColumnBrowseResults};
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
@ -16,16 +15,4 @@ pub(crate) struct Trending {
pub contents: Contents,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct Contents {
pub two_column_browse_results_renderer: BrowseResults,
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct BrowseResults {
#[serde_as(as = "VecSkipError<_>")]
pub tabs: Vec<Tab<YouTubeListRendererWrap>>,
}
type Contents = TwoColumnBrowseResults<Tab<YouTubeListRendererWrap>>;

View file

@ -6,21 +6,20 @@ use serde_with::{rust::deserialize_ignore_any, serde_as, DefaultOnError, VecSkip
use crate::serializer::text::TextComponent;
use crate::serializer::{
text::{AccessibilityText, AttributedText, Text, TextComponents},
MapResult, VecLogError,
MapResult,
};
use super::{
url_endpoint::BrowseEndpointWrap, ContinuationEndpoint, ContinuationItemRenderer, Icon,
MusicContinuationData, Thumbnails,
};
use super::{ChannelBadge, ResponseContext, YouTubeListItem};
use super::{ChannelBadge, ContentsRendererLogged, ResponseContext, YouTubeListItem};
/*
#VIDEO DETAILS
*/
/// Video details response
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct VideoDetails {
@ -29,7 +28,6 @@ pub(crate) struct VideoDetails {
/// Video ID
pub current_video_endpoint: Option<CurrentVideoEndpoint>,
/// Video chapters + comment section
#[serde_as(as = "VecLogError<_>")]
pub engagement_panels: MapResult<Vec<EngagementPanel>>,
pub response_context: ResponseContext,
}
@ -60,11 +58,9 @@ pub(crate) struct VideoResultsWrap {
}
/// Video metadata items
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct VideoResults {
#[serde_as(as = "Option<VecLogError<_>>")]
pub contents: Option<MapResult<Vec<VideoResultsItem>>>,
}
@ -303,7 +299,6 @@ pub(crate) struct RecommendationResultsWrap {
#[serde(rename_all = "camelCase")]
pub(crate) struct RecommendationResults {
/// Can be `None` for age-restricted videos
#[serde_as(as = "Option<VecLogError<_>>")]
pub results: Option<MapResult<Vec<YouTubeListItem>>>,
#[serde_as(as = "Option<VecSkipError<_>>")]
pub continuations: Option<Vec<MusicContinuationData>>,
@ -341,16 +336,7 @@ pub(crate) enum EngagementPanelRenderer {
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct ChapterMarkersContent {
pub macro_markers_list_renderer: MacroMarkersListRenderer,
}
/// Chapter markers
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct MacroMarkersListRenderer {
#[serde_as(as = "VecLogError<_>")]
pub contents: MapResult<Vec<MacroMarkersListItem>>,
pub macro_markers_list_renderer: ContentsRendererLogged<MacroMarkersListItem>,
}
/// Chapter marker
@ -436,7 +422,6 @@ pub(crate) struct CommentItemSectionHeaderMenuItem {
*/
/// Video comments continuation response
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct VideoComments {
@ -450,7 +435,6 @@ pub(crate) struct VideoComments {
/// - Comment replies: appendContinuationItemsAction
/// - n*commentRenderer, continuationItemRenderer:
/// replies + continuation
#[serde_as(as = "VecLogError<_>")]
pub on_response_received_endpoints: MapResult<Vec<CommentsContItem>>,
}
@ -463,11 +447,9 @@ pub(crate) struct CommentsContItem {
}
/// Video comments continuation action
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct AppendComments {
#[serde_as(as = "VecLogError<_>")]
pub continuation_items: MapResult<Vec<CommentListItem>>,
}
@ -536,6 +518,8 @@ pub(crate) struct CommentRenderer {
pub author_comment_badge: Option<AuthorCommentBadge>,
#[serde(default)]
pub reply_count: u64,
#[serde_as(as = "Option<Text>")]
pub vote_count: Option<String>,
/// Buttons for comment interaction (Like/Dislike/Reply)
pub action_buttons: CommentActionButtons,
}
@ -581,7 +565,6 @@ pub(crate) struct CommentActionButtons {
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct CommentActionButtonsRenderer {
pub like_button: ToggleButtonWrap,
pub creator_heart: Option<CreatorHeart>,
}

View file

@ -4,7 +4,7 @@ use serde::Deserialize;
use serde_with::{
json::JsonString, rust::deserialize_ignore_any, serde_as, DefaultOnError, VecSkipError,
};
use time::{Duration, OffsetDateTime};
use time::OffsetDateTime;
use super::{url_endpoint::NavigationEndpoint, ChannelBadge, ContinuationEndpoint, Thumbnails};
use crate::{
@ -15,10 +15,9 @@ use crate::{
param::Language,
serializer::{
text::{AccessibilityText, Text, TextComponent},
MapResult, VecLogError,
MapResult,
},
timeago,
util::{self, TryRemove},
util::{self, timeago, TryRemove},
};
#[serde_as]
@ -69,7 +68,6 @@ pub(crate) enum YouTubeListItem {
#[serde(alias = "expandedShelfContentsRenderer", alias = "gridRenderer")]
ItemSectionRenderer {
#[serde(alias = "items")]
#[serde_as(as = "VecLogError<_>")]
contents: MapResult<Vec<YouTubeListItem>>,
},
@ -206,11 +204,9 @@ pub(crate) struct YouTubeListRendererWrap {
pub section_list_renderer: YouTubeListRenderer,
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct YouTubeListRenderer {
#[serde_as(as = "VecLogError<_>")]
pub contents: MapResult<Vec<YouTubeListItem>>,
}
@ -415,7 +411,7 @@ impl<T> YouTubeListMapper<T> {
}
}
pub fn with_channel<C>(lang: Language, channel: &Channel<C>) -> Self {
pub fn with_channel<C>(lang: Language, channel: &Channel<C>, warnings: Vec<String>) -> Self {
Self {
lang,
channel: Some(ChannelTag {
@ -426,7 +422,7 @@ impl<T> YouTubeListMapper<T> {
subscriber_count: channel.subscriber_count,
}),
items: Vec::new(),
warnings: Vec::new(),
warnings,
ctoken: None,
corrected_query: None,
channel_info: None,
@ -505,8 +501,11 @@ impl<T> YouTubeListMapper<T> {
length: video.accessibility.and_then(|acc| {
ACCESSIBILITY_SEP_REGEX.captures(&acc).and_then(|cap| {
cap.get(1).and_then(|c| {
timeago::parse_timeago_or_warn(self.lang, c.as_str(), &mut self.warnings)
.map(|ta| Duration::from(ta).whole_seconds() as u32)
timeago::parse_video_duration_or_warn(
self.lang,
c.as_str(),
&mut self.warnings,
)
})
})
}),
@ -518,7 +517,7 @@ impl<T> YouTubeListMapper<T> {
publish_date_txt: pub_date_txt,
view_count: video
.view_count_text
.map(|txt| util::parse_large_numstr(&txt, lang).unwrap_or_default()),
.and_then(|txt| util::parse_large_numstr_or_warn(&txt, lang, &mut self.warnings)),
is_live: false,
is_short: true,
is_upcoming: false,
@ -572,10 +571,12 @@ impl<T> YouTubeListMapper<T> {
name: channel.title,
avatar: channel.thumbnail.into(),
verification: channel.owner_badges.into(),
subscriber_count: sc_txt
.and_then(|txt| util::parse_numeric_or_warn(&txt, &mut self.warnings)),
video_count: vc_text
.and_then(|txt| util::parse_numeric_or_warn(&txt, &mut self.warnings)),
subscriber_count: sc_txt.and_then(|txt| {
util::parse_large_numstr_or_warn(&txt, self.lang, &mut self.warnings)
}),
video_count: vc_text.and_then(|txt| {
util::parse_large_numstr_or_warn(&txt, self.lang, &mut self.warnings)
}),
short_description: channel.description_snippet,
}
}

View file

@ -22,7 +22,7 @@ SearchResult(
),
],
verification: Verified,
subscriber_count: Some(582),
subscriber_count: Some(582000),
video_count: None,
short_description: "Music Submissions: https://monstafluff.edmdistrict.com/",
)),
@ -42,7 +42,7 @@ SearchResult(
),
],
verification: Artist,
subscriber_count: Some(403),
subscriber_count: Some(4030000),
video_count: None,
short_description: "Welcome to the official Music Travel Love YouTube channel! We travel the world making music, friends, videos and memories!",
)),
@ -62,7 +62,7 @@ SearchResult(
),
],
verification: Verified,
subscriber_count: Some(167),
subscriber_count: Some(167000),
video_count: None,
short_description: "MUSIC IN HARMONY WITH YOUR LIFE!!! If any producer, label, artist or photographer has an issue with any of the music or\u{a0}...",
)),
@ -82,7 +82,7 @@ SearchResult(
),
],
verification: Artist,
subscriber_count: Some(411),
subscriber_count: Some(411000),
video_count: None,
short_description: "The official YouTube channel of HAEVN Music. Receiving a piano from his grandfather had a great impact on Jorrit\'s life.",
)),
@ -102,7 +102,7 @@ SearchResult(
),
],
verification: None,
subscriber_count: Some(312),
subscriber_count: Some(31200),
video_count: None,
short_description: "Hello and welcome to \"Artemis Music\"! Music can play an effective role in helping us lead a better and more productive life.",
)),
@ -122,7 +122,7 @@ SearchResult(
),
],
verification: Verified,
subscriber_count: Some(372),
subscriber_count: Some(372000),
video_count: None,
short_description: "Music is the only language in which you cannot say a mean or sarcastic thing. Have fun listening to music.",
)),
@ -142,7 +142,7 @@ SearchResult(
),
],
verification: Verified,
subscriber_count: Some(178),
subscriber_count: Some(178000),
video_count: None,
short_description: "S!X - Music is an independent Hip-Hop label. Soundcloud : https://soundcloud.com/s1xmusic Facebook\u{a0}...",
)),
@ -162,7 +162,7 @@ SearchResult(
),
],
verification: Verified,
subscriber_count: Some(104),
subscriber_count: Some(1040000),
video_count: None,
short_description: "Welcome to Shake Music, a Trap & Bass Channel / Record Label dedicated to bringing you the best tracks. All tracks on Shake\u{a0}...",
)),
@ -182,7 +182,7 @@ SearchResult(
),
],
verification: Verified,
subscriber_count: Some(822),
subscriber_count: Some(822000),
video_count: None,
short_description: "Welcome to Miracle Music! On this channel you will find a wide variety of different Deep House, Tropical House, Chill Out, EDM,.",
)),
@ -202,7 +202,7 @@ SearchResult(
),
],
verification: Verified,
subscriber_count: Some(462),
subscriber_count: Some(4620000),
video_count: None,
short_description: "",
)),
@ -222,7 +222,7 @@ SearchResult(
),
],
verification: Verified,
subscriber_count: Some(105),
subscriber_count: Some(1050000),
video_count: None,
short_description: "BRINGING YOU ONLY THE BEST EDM - TRAP Submit your own track for promotion here:\u{a0}...",
)),
@ -242,7 +242,7 @@ SearchResult(
),
],
verification: Verified,
subscriber_count: Some(709),
subscriber_count: Some(709000),
video_count: None,
short_description: "Hey there! I am Mr MoMo My channel focus on Japan music, lofi, trap & bass type beat and Japanese instrumental. I mindfully\u{a0}...",
)),
@ -262,7 +262,7 @@ SearchResult(
),
],
verification: None,
subscriber_count: Some(544),
subscriber_count: Some(54400),
video_count: None,
short_description: "",
)),
@ -282,7 +282,7 @@ SearchResult(
),
],
verification: None,
subscriber_count: Some(359),
subscriber_count: Some(3590),
video_count: None,
short_description: "Welcome to our Energy Transformation Relaxing Music . This chakra music channel will focus on developing the best chakra\u{a0}...",
)),
@ -302,7 +302,7 @@ SearchResult(
),
],
verification: Verified,
subscriber_count: Some(416),
subscriber_count: Some(416000),
video_count: None,
short_description: "Nonstop Music - Home of 1h videos of your favourite songs and mixes. Nonstop Genres: Pop • Chillout • Tropical House • Deep\u{a0}...",
)),
@ -322,7 +322,7 @@ SearchResult(
),
],
verification: Verified,
subscriber_count: Some(3),
subscriber_count: Some(3000000),
video_count: None,
short_description: "Vibe Music strives to bring the best lyric videos of popular Rap & Hip Hop songs. Be sure to Subscribe to see new videos we\u{a0}...",
)),
@ -342,7 +342,7 @@ SearchResult(
),
],
verification: None,
subscriber_count: Some(120),
subscriber_count: Some(120000),
video_count: None,
short_description: "",
)),
@ -362,7 +362,7 @@ SearchResult(
),
],
verification: None,
subscriber_count: Some(817),
subscriber_count: Some(81700),
video_count: None,
short_description: "",
)),
@ -382,7 +382,7 @@ SearchResult(
),
],
verification: None,
subscriber_count: Some(53),
subscriber_count: Some(53000),
video_count: None,
short_description: "Welcome to my channel - Helios Music. I created this channel to help people have the most relaxing, refreshing and comfortable\u{a0}...",
)),
@ -402,7 +402,7 @@ SearchResult(
),
],
verification: None,
subscriber_count: Some(129),
subscriber_count: Some(129000),
video_count: None,
short_description: "Music On (UNOFFICIAL CHANNEL)",
)),

View file

@ -22,7 +22,7 @@ SearchResult(
),
],
verification: Verified,
subscriber_count: Some(292),
subscriber_count: Some(2920000),
video_count: Some(219),
short_description: "Hi, I\'m Tina, aka Doobydobap! Food is the medium I use to tell stories and connect with people who share the same passion as I\u{a0}...",
)),

View file

@ -56,7 +56,7 @@ impl MapResponse<Paginator<VideoItem>> for response::Startpage {
lang: crate::param::Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
) -> Result<MapResult<Paginator<VideoItem>>, ExtractionError> {
let mut contents = self.contents.two_column_browse_results_renderer.tabs;
let mut contents = self.contents.two_column_browse_results_renderer.contents;
let grid = contents
.try_swap_remove(0)
.ok_or(ExtractionError::InvalidData(Cow::Borrowed("no contents")))?
@ -80,7 +80,7 @@ impl MapResponse<Vec<VideoItem>> for response::Trending {
lang: crate::param::Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
) -> Result<MapResult<Vec<VideoItem>>, ExtractionError> {
let mut contents = self.contents.two_column_browse_results_renderer.tabs;
let mut contents = self.contents.two_column_browse_results_renderer.contents;
let items = contents
.try_swap_remove(0)
.ok_or(ExtractionError::InvalidData(Cow::Borrowed("no contents")))?

View file

@ -7,8 +7,7 @@ use crate::{
model::{paginator::Paginator, ChannelTag, Chapter, Comment, VideoDetails, VideoItem},
param::Language,
serializer::MapResult,
timeago,
util::{self, TryRemove},
util::{self, timeago, TryRemove},
};
use super::{
@ -191,9 +190,10 @@ impl MapResponse<VideoDetails> for response::VideoDetails {
};
let comment_count = comment_count_section.and_then(|s| {
util::parse_large_numstr::<u64>(
util::parse_large_numstr_or_warn::<u64>(
&s.comments_entry_point_header_renderer.comment_count,
lang,
&mut warnings,
)
});
@ -331,9 +331,9 @@ impl MapResponse<VideoDetails> for response::VideoDetails {
name: channel_name,
avatar: owner.thumbnail.into(),
verification: owner.badges.into(),
subscriber_count: owner
.subscriber_count_text
.and_then(|txt| util::parse_large_numstr(&txt, lang)),
subscriber_count: owner.subscriber_count_text.and_then(|txt| {
util::parse_large_numstr_or_warn(&txt, lang, &mut warnings)
}),
},
view_count,
like_count,
@ -505,16 +505,16 @@ fn map_comment(
}),
_ => None,
},
publish_date: timeago::parse_timeago_to_dt(lang, &c.published_time_text),
publish_date_txt: c.published_time_text,
like_count: util::parse_numeric_or_warn(
&c.action_buttons
.comment_action_buttons_renderer
.like_button
.toggle_button_renderer
.accessibility_data,
publish_date: timeago::parse_timeago_dt_or_warn(
lang,
&c.published_time_text,
&mut warnings,
),
publish_date_txt: c.published_time_text,
like_count: match c.vote_count {
Some(txt) => util::parse_numeric_or_warn(&txt, &mut warnings),
None => Some(0),
},
reply_count: c.reply_count as u32,
replies: replies
.map(|items| Paginator::new(Some(c.reply_count), items, reply_ctoken))

View file

@ -11,5 +11,4 @@ pub mod error;
pub mod model;
pub mod param;
pub mod report;
pub mod timeago;
pub mod validate;

View file

@ -6,7 +6,7 @@ mod vec_log_err;
pub use date::DateYmd;
pub use range::Range;
pub use vec_log_err::VecLogError;
pub use vec_log_err::VecSkipErrorWrap;
use std::fmt::Debug;

View file

@ -1,10 +1,9 @@
use std::{fmt, marker::PhantomData};
use serde::{
de::{SeqAccess, Visitor},
de::{IgnoredAny, SeqAccess, Visitor},
Deserialize,
};
use serde_with::{de::DeserializeAsWrap, DeserializeAs};
use super::MapResult;
@ -13,39 +12,26 @@ use super::MapResult;
///
/// This is similar to `VecSkipError`, but it does not silently ignore
/// faulty items.
pub struct VecLogError<T>(PhantomData<T>);
impl<'de, T, U> DeserializeAs<'de, MapResult<Vec<T>>> for VecLogError<U>
impl<'de, T> Deserialize<'de> for MapResult<Vec<T>>
where
U: DeserializeAs<'de, T>,
T: Deserialize<'de>,
{
fn deserialize_as<D>(deserializer: D) -> Result<MapResult<Vec<T>>, D::Error>
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
#[derive(serde::Deserialize)]
#[serde(
untagged,
bound(deserialize = "DeserializeAsWrap<T, TAs>: Deserialize<'de>")
)]
enum GoodOrError<'a, T, TAs>
where
TAs: DeserializeAs<'a, T>,
{
Good(DeserializeAsWrap<T, TAs>),
Error(serde_json::value::Value),
#[serde(skip)]
_JustAMarkerForTheLifetime(PhantomData<&'a u32>),
#[serde(untagged)]
enum GoodOrError<T> {
Good(T),
Error(serde_json::Value),
}
struct SeqVisitor<T, U> {
marker: PhantomData<T>,
marker2: PhantomData<U>,
}
struct SeqVisitor<T>(PhantomData<T>);
impl<'de, T, U> Visitor<'de> for SeqVisitor<T, U>
impl<'de, T> Visitor<'de> for SeqVisitor<T>
where
U: DeserializeAs<'de, T>,
T: Deserialize<'de>,
{
type Value = MapResult<Vec<T>>;
@ -62,16 +48,15 @@ where
while let Some(value) = seq.next_element()? {
match value {
GoodOrError::<T, U>::Good(value) => {
values.push(value.into_inner());
GoodOrError::<T>::Good(value) => {
values.push(value);
}
GoodOrError::<T, U>::Error(value) => {
GoodOrError::<T>::Error(value) => {
warnings.push(format!(
"error deserializing item: {}",
serde_json::to_string(&value).unwrap_or_default()
));
}
_ => {}
}
}
Ok(MapResult {
@ -81,43 +66,113 @@ where
}
}
let visitor = SeqVisitor::<T, U> {
marker: PhantomData,
marker2: PhantomData,
};
deserializer.deserialize_seq(visitor)
deserializer.deserialize_seq(SeqVisitor(PhantomData::<T>))
}
}
/// Reimplementation of VecSkipError using a wrapper type
/// to allow use with generics
pub struct VecSkipErrorWrap<T>(pub Vec<T>);
impl<'de, T> Deserialize<'de> for VecSkipErrorWrap<T>
where
T: Deserialize<'de>,
{
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
#[derive(serde::Deserialize)]
#[serde(untagged)]
enum GoodOrError<T> {
Good(T),
Error(IgnoredAny),
}
struct SeqVisitor<T>(PhantomData<T>);
impl<'de, T> Visitor<'de> for SeqVisitor<T>
where
T: Deserialize<'de>,
{
type Value = VecSkipErrorWrap<T>;
fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
formatter.write_str("a sequence")
}
fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
where
A: SeqAccess<'de>,
{
let mut values = Vec::with_capacity(seq.size_hint().unwrap_or_default());
while let Some(value) = seq.next_element()? {
match value {
GoodOrError::<T>::Good(value) => {
values.push(value);
}
GoodOrError::<T>::Error(_) => {}
}
}
Ok(VecSkipErrorWrap(values))
}
}
deserializer.deserialize_seq(SeqVisitor(PhantomData::<T>))
}
}
#[cfg(test)]
mod tests {
use serde::Deserialize;
use serde_with::serde_as;
use crate::serializer::MapResult;
#[serde_as]
use super::VecSkipErrorWrap;
#[derive(Debug, Deserialize)]
#[allow(dead_code)]
struct S {
#[serde_as(as = "crate::serializer::VecLogError<_>")]
struct SLog {
items: MapResult<Vec<Item>>,
}
#[derive(Deserialize)]
#[allow(dead_code)]
struct SSkip {
items: VecSkipErrorWrap<Item>,
}
#[derive(Debug, Deserialize)]
#[allow(dead_code)]
struct Item {
name: String,
}
#[test]
fn test() {
let json = r#"{"items": [{"name": "i1"}, {"xyz": "i2"}, {"name": "i3"}, {"namra": "i4"}]}"#;
const JSON: &str =
r#"{"items": [{"name": "i1"}, {"xyz": "i2"}, {"name": "i3"}, {"namra": "i4"}]}"#;
let res = serde_json::from_str::<S>(json).unwrap();
#[test]
fn skip_error() {
let res = serde_json::from_str::<SSkip>(JSON).unwrap();
insta::assert_debug_snapshot!(res.items.0, @r###"
[
Item {
name: "i1",
},
Item {
name: "i3",
},
]
"###);
}
#[test]
fn log_error() {
let res = serde_json::from_str::<SLog>(JSON).unwrap();
insta::assert_debug_snapshot!(res, @r###"
S {
SLog {
items: [
Item {
name: "i1",

File diff suppressed because it is too large Load diff

View file

@ -2,6 +2,7 @@ mod date;
mod protobuf;
pub mod dictionary;
pub mod timeago;
pub use date::{now_sec, shift_months, shift_years};
pub use protobuf::{string_from_pb, ProtoBuilder};
@ -19,7 +20,7 @@ use rand::Rng;
use regex::Regex;
use url::Url;
use crate::{error::Error, param::Language};
use crate::{error::Error, param::Language, serializer::text::TextComponent};
pub static VIDEO_ID_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[A-Za-z0-9_-]{11}$").unwrap());
pub static CHANNEL_ID_REGEX: Lazy<Regex> =
@ -34,8 +35,6 @@ pub static VANITY_PATH_REGEX: Lazy<Regex> = Lazy::new(|| {
/// Separator string for YouTube Music subtitles
pub const DOT_SEPARATOR: &str = "";
/// YouTube Music name (author of official playlists)
pub const YT_MUSIC_NAME: &str = "YouTube Music";
pub const VARIOUS_ARTISTS: &str = "Various Artists";
pub const PLAYLIST_ID_ALBUM_PREFIX: &str = "OLAK";
@ -143,7 +142,7 @@ where
/// and return the duration in seconds.
pub fn parse_video_length(text: &str) -> Option<u32> {
static VIDEO_LENGTH_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"(?:(\d+):)?(\d{1,2}):(\d{2})"#).unwrap());
Lazy::new(|| Regex::new(r#"(?:(\d+)[:.])?(\d{1,2})[:.](\d{2})"#).unwrap());
VIDEO_LENGTH_REGEX.captures(text).map(|cap| {
let hrs = cap
.get(1)
@ -272,56 +271,114 @@ impl<T> TryRemove<T> for Vec<T> {
}
}
/// Check if a channel name equals "YouTube Music"
/// (the author of original YouTube music playlists)
pub(crate) fn is_ytm(text: &TextComponent) -> bool {
if let TextComponent::Text { text } = text {
text.starts_with("YouTube")
} else {
false
}
}
/// Check if a language should be parsed by character
pub fn lang_by_char(lang: Language) -> bool {
matches!(
lang,
Language::Ja | Language::ZhCn | Language::ZhHk | Language::ZhTw
)
}
/// Parse a large, textual number (e.g. `1.4M subscribers`, `22K views`)
pub fn parse_large_numstr<F>(string: &str, lang: Language) -> Option<F>
where
F: TryFrom<u64>,
{
// Special case for Gujarati: the "no views" text does not contain
// any parseable tokens: the 2 words occur in any view count text.
// This may be a translation error.
if lang == Language::Gu && string == "જોવાયાની સંખ્યા" {
return 0.try_into().ok();
}
let dict_entry = dictionary::entry(lang);
let by_char = lang_by_char(lang) || lang == Language::Ko;
let decimal_point = match dict_entry.comma_decimal {
true => ',',
false => '.',
};
let (num, mut exp, filtered) = {
let mut buf = String::new();
let mut filtered = String::new();
let mut exp = 0;
let mut after_point = false;
for c in string.chars() {
if c.is_ascii_digit() {
buf.push(c);
let mut digits = String::new();
let mut filtered = String::new();
let mut exp = 0;
let mut after_point = false;
if after_point {
exp -= 1;
}
} else if c == decimal_point {
after_point = true;
} else if !matches!(c, '\u{200b}' | '.' | ',') {
filtered.push(c);
for c in string.chars() {
if c.is_ascii_digit() {
digits.push(c);
if after_point {
exp -= 1;
}
} else if c == decimal_point {
after_point = true;
} else if !matches!(
c,
'\u{200b}' | '\u{202b}' | '\u{202c}' | '\u{202e}' | '\u{200e}' | '\u{200f}' | '.' | ','
) {
c.to_lowercase().for_each(|c| filtered.push(c));
}
(buf.parse::<u64>().ok()?, exp, filtered)
};
let lookup_token = |token: &str| match token {
"K" | "k" => Some(3),
_ => dict_entry.number_tokens.get(token).map(|t| *t as i32),
};
if dict_entry.by_char {
exp += filtered
.chars()
.filter_map(|token| lookup_token(&token.to_string()))
.sum::<i32>();
} else {
exp += filtered
.split_whitespace()
.filter_map(lookup_token)
.sum::<i32>();
}
F::try_from(num.checked_mul((10_u64).checked_pow(exp.try_into().ok()?)?)?).ok()
if digits.is_empty() {
if by_char {
filtered
.chars()
.find_map(|c| dict_entry.number_nd_tokens.get(&c.to_string()))
.and_then(|n| (*n as u64).try_into().ok())
} else {
filtered
.split_whitespace()
.find_map(|token| dict_entry.number_nd_tokens.get(token))
.and_then(|n| (*n as u64).try_into().ok())
}
} else {
let num = digits.parse::<u64>().ok()?;
let lookup_token = |token: &str| match token {
"k" => Some(3),
_ => dict_entry.number_tokens.get(token).map(|t| *t as i32),
};
if by_char {
exp += filtered
.chars()
.filter_map(|token| lookup_token(&token.to_string()))
.sum::<i32>();
} else {
exp += filtered
.split_whitespace()
.filter_map(lookup_token)
.sum::<i32>();
}
F::try_from(num.checked_mul((10_u64).checked_pow(exp.try_into().ok()?)?)?).ok()
}
}
pub fn parse_large_numstr_or_warn<F>(
string: &str,
lang: Language,
warnings: &mut Vec<String>,
) -> Option<F>
where
F: TryFrom<u64>,
{
let res = parse_large_numstr::<F>(string, lang);
if res.is_none() {
warnings.push(format!("could not parse numstr `{string}`"));
}
res
}
/// Replace all html control characters to make a string safe for inserting into HTML.
@ -448,23 +505,21 @@ pub(crate) mod tests {
assert_eq!(res, expect);
}
#[test]
fn t_parse_large_numstr_samples() {
let json_path = path!(*TESTFILES / "dict" / "large_number_samples.json");
let json_file = File::open(json_path).unwrap();
let number_samples: BTreeMap<Language, BTreeMap<u8, (String, u64)>> =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
number_samples.iter().for_each(|(lang, entry)| {
entry.iter().for_each(|(_, (txt, expect))| {
testcase_parse_large_numstr(txt, *lang, *expect);
});
});
#[rstest]
#[case(
Language::Iw,
"\u{200f}\u{202b}3.36M\u{200f}\u{202c}\u{200f} \u{200f}מנויים\u{200f}",
3_360_000
)]
#[case(Language::As, "১ জন গ্ৰাহক", 1)]
fn t_parse_large_numstr(#[case] lang: Language, #[case] string: &str, #[case] expect: u64) {
let res = parse_large_numstr::<u64>(string, lang).unwrap();
assert_eq!(res, expect);
}
#[test]
fn t_parse_large_numstr_samples2() {
let json_path = path!(*TESTFILES / "dict" / "large_number_samples_all.json");
fn t_parse_large_numstr_samples() {
let json_path = path!(*TESTFILES / "dict" / "large_number_samples.json");
let json_file = File::open(json_path).unwrap();
let number_samples: BTreeMap<Language, BTreeMap<String, u64>> =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
@ -481,12 +536,18 @@ pub(crate) mod tests {
// in the string.
let rounded = {
let n_significant_d = string.chars().filter(char::is_ascii_digit).count();
let mag = (expect as f64).log10().floor();
let factor = 10_u64.pow(1 + mag as u32 - n_significant_d as u32);
(((expect as f64) / factor as f64).floor() as u64) * factor
if n_significant_d == 0 {
expect
} else {
let mag = (expect as f64).log10().floor();
let factor = 10_u64.pow(1 + mag as u32 - n_significant_d as u32);
(((expect as f64) / factor as f64).floor() as u64) * factor
}
};
let res = parse_large_numstr::<u64>(string, lang).expect(string);
assert_eq!(res, rounded, "{string} (lang: {lang}, exact: {expect})");
let emsg = format!("{string} (lang: {lang}, exact: {expect})");
let res = parse_large_numstr::<u64>(string, lang).expect(&emsg);
assert_eq!(res, rounded, "{emsg}");
}
}

View file

@ -9,11 +9,6 @@
//!
//! This module can parse these dates using an embedded dictionary which
//! contains date/time unit tokens for all supported languages.
//!
//! Note that this module is public so it can be tested from outside
//! the crate, which is important for including new languages, too.
//!
//! It is not intended to be used to parse textual dates that are not from YouTube.
use std::ops::Mul;
@ -70,17 +65,37 @@ pub enum TimeUnit {
/// Value of a parsed TimeAgo token, used in the dictionary
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub(crate) struct TaToken {
pub struct TaToken {
pub n: u8,
pub unit: Option<TimeUnit>,
}
pub(crate) enum DateCmp {
pub enum DateCmp {
Y,
M,
D,
}
impl TimeUnit {
pub fn secs(&self) -> i64 {
match self {
TimeUnit::Second => 1,
TimeUnit::Minute => 60,
TimeUnit::Hour => 3600,
TimeUnit::Day => 24 * 3600,
TimeUnit::Week => 7 * 24 * 3600,
TimeUnit::Month => 30 * 24 * 3600,
TimeUnit::Year => 365 * 24 * 3600,
}
}
}
impl TimeAgo {
fn secs(&self) -> i64 {
i64::from(self.n) * self.unit.secs()
}
}
impl Mul<u8> for TimeAgo {
type Output = Self;
@ -94,15 +109,7 @@ impl Mul<u8> for TimeAgo {
impl From<TimeAgo> for Duration {
fn from(ta: TimeAgo) -> Self {
match ta.unit {
TimeUnit::Second => Duration::seconds(ta.n as i64),
TimeUnit::Minute => Duration::minutes(ta.n as i64),
TimeUnit::Hour => Duration::hours(ta.n as i64),
TimeUnit::Day => Duration::days(ta.n as i64),
TimeUnit::Week => Duration::weeks(ta.n as i64),
TimeUnit::Month => Duration::days(ta.n as i64 * 30),
TimeUnit::Year => Duration::days(ta.n as i64 * 365),
}
Duration::seconds(ta.secs())
}
}
@ -142,14 +149,19 @@ fn filter_str(string: &str) -> String {
.collect()
}
fn parse_ta_token(entry: &dictionary::Entry, nd: bool, filtered_str: &str) -> Option<TimeAgo> {
fn parse_ta_token(
entry: &dictionary::Entry,
by_char: bool,
nd: bool,
filtered_str: &str,
) -> Option<TimeAgo> {
let tokens = match nd {
true => &entry.timeago_nd_tokens,
false => &entry.timeago_tokens,
};
let mut qu = 1;
if entry.by_char {
if by_char {
filtered_str.chars().find_map(|word| {
tokens.get(&word.to_string()).and_then(|t| match t.unit {
Some(unit) => Some(TimeAgo { n: t.n * qu, unit }),
@ -172,54 +184,78 @@ fn parse_ta_token(entry: &dictionary::Entry, nd: bool, filtered_str: &str) -> Op
}
}
fn parse_textual_month(entry: &dictionary::Entry, filtered_str: &str) -> Option<u8> {
if entry.by_char {
// Chinese/Japanese dont use textual months
None
fn parse_ta_tokens(
entry: &dictionary::Entry,
by_char: bool,
nd: bool,
filtered_str: &str,
) -> Vec<TimeAgo> {
let tokens = match nd {
true => &entry.timeago_nd_tokens,
false => &entry.timeago_tokens,
};
let mut qu = 1;
if by_char {
filtered_str
.chars()
.filter_map(|word| {
tokens.get(&word.to_string()).and_then(|t| match t.unit {
Some(unit) => Some(TimeAgo { n: t.n * qu, unit }),
None => {
qu = t.n;
None
}
})
})
.collect()
} else {
filtered_str
.split_whitespace()
.find_map(|word| entry.months.get(word).copied())
.filter_map(|word| {
tokens.get(word).and_then(|t| match t.unit {
Some(unit) => Some(TimeAgo { n: t.n * qu, unit }),
None => {
qu = t.n;
None
}
})
})
.collect()
}
}
fn parse_textual_month(entry: &dictionary::Entry, filtered_str: &str) -> Option<u8> {
filtered_str
.split_whitespace()
.find_map(|word| entry.months.get(word).copied())
}
/// Parse a TimeAgo string (e.g. "29 minutes ago") into a TimeAgo object.
///
/// Returns None if the date could not be parsed.
/// Returns [`None`] if the date could not be parsed.
pub fn parse_timeago(lang: Language, textual_date: &str) -> Option<TimeAgo> {
let entry = dictionary::entry(lang);
let filtered_str = filter_str(textual_date);
let qu: u8 = util::parse_numeric(textual_date).unwrap_or(1);
parse_ta_token(&entry, false, &filtered_str).map(|ta| ta * qu)
parse_ta_token(&entry, util::lang_by_char(lang), false, &filtered_str).map(|ta| ta * qu)
}
/// Parse a TimeAgo string (e.g. "29 minutes ago") into a Chrono DateTime object.
///
/// Returns None if the date could not be parsed.
pub fn parse_timeago_to_dt(lang: Language, textual_date: &str) -> Option<OffsetDateTime> {
/// Returns [`None`] if the date could not be parsed.
pub fn parse_timeago_dt(lang: Language, textual_date: &str) -> Option<OffsetDateTime> {
parse_timeago(lang, textual_date).map(|ta| ta.into())
}
pub(crate) fn parse_timeago_or_warn(
lang: Language,
textual_date: &str,
warnings: &mut Vec<String>,
) -> Option<TimeAgo> {
let res = parse_timeago(lang, textual_date);
if res.is_none() {
warnings.push(format!("could not parse timeago `{textual_date}`"));
}
res
}
pub(crate) fn parse_timeago_dt_or_warn(
pub fn parse_timeago_dt_or_warn(
lang: Language,
textual_date: &str,
warnings: &mut Vec<String>,
) -> Option<OffsetDateTime> {
let res = parse_timeago_to_dt(lang, textual_date);
let res = parse_timeago_dt(lang, textual_date);
if res.is_none() {
warnings.push(format!("could not parse timeago `{textual_date}`"));
}
@ -228,19 +264,20 @@ pub(crate) fn parse_timeago_dt_or_warn(
/// Parse a textual date (e.g. "29 minutes ago" or "Jul 2, 2014") into a ParsedDate object.
///
/// Returns None if the date could not be parsed.
/// Returns [`None`] if the date could not be parsed.
pub fn parse_textual_date(lang: Language, textual_date: &str) -> Option<ParsedDate> {
let entry = dictionary::entry(lang);
let by_char = util::lang_by_char(lang);
let filtered_str = filter_str(textual_date);
let nums = util::parse_numeric_vec::<u16>(textual_date);
match nums.len() {
0 => match parse_ta_token(&entry, true, &filtered_str) {
0 => match parse_ta_token(&entry, by_char, true, &filtered_str) {
Some(timeago) => Some(ParsedDate::Relative(timeago)),
None => parse_ta_token(&entry, false, &filtered_str).map(ParsedDate::Relative),
None => parse_ta_token(&entry, by_char, false, &filtered_str).map(ParsedDate::Relative),
},
1 => parse_ta_token(&entry, false, &filtered_str)
1 => parse_ta_token(&entry, by_char, false, &filtered_str)
.map(|timeago| ParsedDate::Relative(timeago * nums[0] as u8)),
2..=3 => {
if nums.len() == entry.date_order.len() {
@ -256,7 +293,8 @@ pub fn parse_textual_date(lang: Language, textual_date: &str) -> Option<ParsedDa
DateCmp::D => d = Some(*n),
});
if m.is_none() {
// Chinese/Japanese dont use textual months
if m.is_none() && !by_char {
m = parse_textual_month(&entry, &filtered_str).map(|n| n as u16);
}
@ -282,7 +320,7 @@ pub fn parse_textual_date_to_dt(lang: Language, textual_date: &str) -> Option<Of
parse_textual_date(lang, textual_date).map(|ta| ta.into())
}
pub(crate) fn parse_textual_date_or_warn(
pub fn parse_textual_date_or_warn(
lang: Language,
textual_date: &str,
warnings: &mut Vec<String>,
@ -294,6 +332,87 @@ pub(crate) fn parse_textual_date_or_warn(
res
}
/// Parse a textual video duration (e.g. "11 minutes, 20 seconds")
///
/// Returns None if the duration could not be parsed
pub fn parse_video_duration(lang: Language, video_duration: &str) -> Option<u32> {
let entry = dictionary::entry(lang);
let by_char = util::lang_by_char(lang);
let parts = split_duration_txt(video_duration, matches!(lang, Language::Si | Language::Sw));
let mut secs = 0;
for part in parts {
let mut n = if part.digits.is_empty() {
1
} else {
part.digits.parse::<u32>().ok()?
};
let tokens = parse_ta_tokens(&entry, by_char, false, &part.word);
if tokens.is_empty() {
return None;
}
tokens.iter().for_each(|ta| {
secs += n * ta.secs() as u32;
n = 1;
});
}
Some(secs)
}
pub fn parse_video_duration_or_warn(
lang: Language,
video_duration: &str,
warnings: &mut Vec<String>,
) -> Option<u32> {
let res = parse_video_duration(lang, video_duration);
if res.is_none() {
warnings.push(format!("could not parse video duration `{video_duration}`"));
}
res
}
#[derive(Default)]
struct DurationTxtSegment {
digits: String,
word: String,
}
fn split_duration_txt(txt: &str, start_c: bool) -> Vec<DurationTxtSegment> {
let mut segments = Vec::new();
// 1: parse digits, 2: parse word
let mut state: u8 = 0;
let mut seg = DurationTxtSegment::default();
for c in txt.chars() {
if c.is_ascii_digit() {
if state == 2 && (!seg.digits.is_empty() || (!start_c && segments.is_empty())) {
segments.push(seg);
seg = DurationTxtSegment::default();
}
seg.digits.push(c);
state = 1;
} else {
if (state == 1) && (!seg.word.is_empty() || (start_c && segments.is_empty())) {
segments.push(seg);
seg = DurationTxtSegment::default();
}
if c != ',' {
c.to_lowercase().for_each(|c| seg.word.push(c));
}
state = 2;
}
}
if !seg.word.is_empty() || !seg.digits.is_empty() {
segments.push(seg);
}
segments
}
#[cfg(test)]
mod tests {
use std::{collections::BTreeMap, fs::File, io::BufReader};
@ -536,6 +655,11 @@ mod tests {
"Last updated on Jun 04, 2003",
Some(ParsedDate::Absolute(date!(2003-6-4)))
)]
#[case(
Language::Bn,
"যোগ দিয়েছেন 24 সেপ, 2013",
Some(ParsedDate::Absolute(date!(2013-9-24)))
)]
fn t_parse_date(
#[case] lang: Language,
#[case] textual_date: &str,
@ -564,11 +688,7 @@ mod tests {
assert_eq!(
parse_textual_date(*lang, samples.get("Yesterday").unwrap()),
Some(ParsedDate::Relative(TimeAgo {
// YT's Singhalese translation has an error (yesterday == today)
n: match lang {
Language::Si => 0,
_ => 1,
},
n: 1,
unit: TimeUnit::Day
})),
"lang: {lang}"
@ -576,7 +696,7 @@ mod tests {
assert_eq!(
parse_textual_date(*lang, samples.get("Ago").unwrap()),
Some(ParsedDate::Relative(TimeAgo {
n: 3,
n: 5,
unit: TimeUnit::Day
})),
"lang: {lang}"
@ -644,6 +764,36 @@ mod tests {
})
}
#[test]
fn t_parse_video_duration() {
let json_path = path!(*TESTFILES / "dict" / "video_duration_samples.json");
let json_file = File::open(json_path).unwrap();
let date_samples: BTreeMap<Language, BTreeMap<String, u32>> =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
date_samples.iter().for_each(|(lang, samples)| {
samples.iter().for_each(|(txt, duration)| {
assert_eq!(
parse_video_duration(*lang, txt),
Some(*duration),
"lang: {lang}; txt: `{txt}`"
);
})
});
}
#[rstest]
#[case(Language::Ar, "19 دقيقة وثانيتان", 1142)]
#[case(Language::Ar, "دقيقة و13 ثانية", 73)]
#[case(Language::Sw, "dakika 1 na sekunde 13", 73)]
fn t_parse_video_duration2(
#[case] lang: Language,
#[case] video_duration: &str,
#[case] expect: u32,
) {
assert_eq!(parse_video_duration(lang, video_duration), Some(expect));
}
#[test]
fn t_to_datetime() {
// Absolute date