fix: shorts duration parsing, playlist dates (no), number_nd_tokens (is)
This commit is contained in:
parent
b862d2d1f9
commit
ef1cdbc91a
19 changed files with 302 additions and 230 deletions
|
|
@ -208,7 +208,9 @@ pub(crate) struct CoverMusicItem {
|
|||
///
|
||||
/// `"2022"` Artist singles
|
||||
///
|
||||
/// `"Playlist", " • ", <"ThetaDev"> " • ", "26 songs"`
|
||||
/// `"Playlist", " • ", <"YouTube Music"> " • ", "53 songs"`
|
||||
///
|
||||
/// `"Playlist", " • ", <"Vevo Playlists"> " • ", "13M views"`
|
||||
///
|
||||
/// `"Playlist", " • ", "YouTube Music" Featured on
|
||||
#[serde(default)]
|
||||
|
|
@ -737,8 +739,9 @@ impl MusicListMapper {
|
|||
let channel = channel_p.and_then(|p| {
|
||||
p.0.into_iter().find_map(|c| ChannelId::try_from(c).ok())
|
||||
});
|
||||
let track_count =
|
||||
tcount_p.and_then(|p| util::parse_numeric(p.first_str()).ok());
|
||||
let track_count = tcount_p
|
||||
.filter(|_| from_ytm)
|
||||
.and_then(|p| util::parse_numeric(p.first_str()).ok());
|
||||
|
||||
self.items.push(MusicItem::Playlist(MusicPlaylistItem {
|
||||
id,
|
||||
|
|
@ -772,7 +775,6 @@ impl MusicListMapper {
|
|||
let mut subtitle_parts = item.subtitle.split(util::DOT_SEPARATOR).into_iter();
|
||||
let subtitle_p1 = subtitle_parts.next();
|
||||
let subtitle_p2 = subtitle_parts.next();
|
||||
let subtitle_p3 = subtitle_parts.next();
|
||||
|
||||
match item.navigation_endpoint.music_page() {
|
||||
Some((page_type, id)) => match page_type {
|
||||
|
|
@ -879,15 +881,13 @@ impl MusicListMapper {
|
|||
let channel = subtitle_p2.and_then(|p| {
|
||||
p.0.into_iter().find_map(|c| ChannelId::try_from(c).ok())
|
||||
});
|
||||
let track_count =
|
||||
subtitle_p3.and_then(|p| util::parse_numeric(p.first_str()).ok());
|
||||
|
||||
self.items.push(MusicItem::Playlist(MusicPlaylistItem {
|
||||
id,
|
||||
name: item.title,
|
||||
thumbnail: item.thumbnail_renderer.into(),
|
||||
channel,
|
||||
track_count,
|
||||
track_count: None,
|
||||
from_ytm,
|
||||
}));
|
||||
Ok(Some(MusicItemType::Playlist))
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use serde::Deserialize;
|
||||
use serde_with::{
|
||||
json::JsonString, rust::deserialize_ignore_any, serde_as, DefaultOnError, VecSkipError,
|
||||
|
|
@ -382,6 +384,10 @@ impl IsShort for Vec<TimeOverlay> {
|
|||
}
|
||||
}
|
||||
|
||||
static ACCESSIBILITY_SEP_REGEX: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new("(?:[ \u{00a0}][-\u{2013}\u{2014}] )|\u{2013}|(?:\u{055d} )|(?:\", )").unwrap()
|
||||
});
|
||||
|
||||
/// Result of mapping a list of different YouTube enities
|
||||
/// (videos, channels, playlists)
|
||||
#[derive(Debug)]
|
||||
|
|
@ -496,14 +502,29 @@ impl<T> YouTubeListMapper<T> {
|
|||
.timestamp_text
|
||||
});
|
||||
|
||||
let length = video.accessibility.and_then(|acc| {
|
||||
let parts = ACCESSIBILITY_SEP_REGEX.split(&acc).collect::<Vec<_>>();
|
||||
if parts.len() > 2 {
|
||||
let i = match lang {
|
||||
Language::Ru => 1,
|
||||
_ => 2,
|
||||
};
|
||||
timeago::parse_video_duration_or_warn(
|
||||
self.lang,
|
||||
parts[parts.len() - i],
|
||||
&mut self.warnings,
|
||||
)
|
||||
} else {
|
||||
self.warnings
|
||||
.push(format!("could not split video duration `{acc}`"));
|
||||
None
|
||||
}
|
||||
});
|
||||
|
||||
VideoItem {
|
||||
id: video.video_id,
|
||||
name: video.headline,
|
||||
length: video.accessibility.and_then(|acc| {
|
||||
acc.rsplit(" - ").nth(1).and_then(|s| {
|
||||
timeago::parse_video_duration_or_warn(self.lang, s, &mut self.warnings)
|
||||
})
|
||||
}),
|
||||
length,
|
||||
thumbnail: video.thumbnail.into(),
|
||||
channel: self.channel.clone(),
|
||||
publish_date: pub_date_txt.as_ref().and_then(|txt| {
|
||||
|
|
@ -704,3 +725,50 @@ impl YouTubeListMapper<PlaylistItem> {
|
|||
res.c.into_iter().for_each(|item| self.map_item(item));
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::ACCESSIBILITY_SEP_REGEX;
|
||||
|
||||
use rstest::rstest;
|
||||
|
||||
#[rstest]
|
||||
#[case::af(
|
||||
"BTS - Permission to Dance Cover #shorts #pinkfong – 50 sekondes – speel video",
|
||||
"50 sekondes"
|
||||
)]
|
||||
#[case::de(
|
||||
"Point of view: Me VS My mom #shorts – 8 Sekunden – Video wiedergeben",
|
||||
"8 Sekunden"
|
||||
)]
|
||||
#[case::be(
|
||||
"Point of view: Me VS My mom #shorts–8 секунд – прайграць відэа",
|
||||
"8 секунд"
|
||||
)]
|
||||
#[case::fil("do u wanna get swole? - 53 segundo - i-play ang video", "53 segundo")]
|
||||
#[case::ar(
|
||||
"«the holy trinity of korean street food»՝ 1 րոպե՝ նվագարկել տեսանյութը",
|
||||
"1 րոպե"
|
||||
)]
|
||||
#[case::lv(
|
||||
"what i ate in google japan — 1 minūte — atskaņot videoklipu",
|
||||
"1 minūte"
|
||||
)]
|
||||
#[case::sq("When you impulse buy... - 1 minutë - luaj videon", "1 minutë")]
|
||||
#[case::uk(
|
||||
"\"Point of view: Me VS My mom #shorts\", 8 секунд – відтворити відео",
|
||||
"8 секунд"
|
||||
)]
|
||||
// INFO: sw is unparseable "coming soonsekunde 58 - cheza video"
|
||||
fn split_duration_txt(#[case] s: &str, #[case] expect: &str) {
|
||||
let parts = ACCESSIBILITY_SEP_REGEX.split(s).collect::<Vec<_>>();
|
||||
assert_eq!(parts[parts.len() - 2], expect);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_duration_txt_ru() {
|
||||
let s = "Воспроизвести видео – \"the holy trinity of korean street food\". Его продолжительность – 1 минута.";
|
||||
let parts = ACCESSIBILITY_SEP_REGEX.split(s).collect::<Vec<_>>();
|
||||
assert_eq!(parts[parts.len() - 1], "1 минута.");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Reference in a new issue