add playlist date collector

This commit is contained in:
ThetaDev 2022-09-06 01:33:43 +02:00
parent 513bf1dc9c
commit c9433d721d
14 changed files with 20408 additions and 75 deletions

File diff suppressed because it is too large Load diff

View file

@ -31,8 +31,9 @@ Throttling issue: Y8JFxS1HlDo
Playlist update dates (05.09.2022):
today: RDCLAK5uy_kj3rhiar1LINmyDcuFnXihEO0K1NQa2jI
yesterday: PL4C44E2875308A280
yesterday: PLmB6td997u3kUOrfFwkULZ910ho44oQSy
2 days ago: PL7zsB-C3aNu2yRY2869T0zj1FhtRIu5am
4 days ago: PLFgm_LJYNEttGkVLopr0a25OAySzfS941
5 days ago: PL3-sRm8xAzY9sDilvaWjCwCI0TkUzYdOG
7 days ago: PLHr0jWPfopte182N54r1ra7tkRJC1fmPu

View file

@ -1,10 +1,11 @@
use anyhow::{anyhow, Result};
use anyhow::{anyhow, Context, Result};
use reqwest::Method;
use serde::Serialize;
use crate::{
model::{Channel, Playlist, Thumbnail, Video},
serializer::text::{PageType, Text, TextLink},
serializer::text::{PageType, TextLink},
util,
};
use super::{response, ClientType, ContextYT, RustyTube};
@ -41,7 +42,9 @@ impl RustyTube {
.await?
.error_for_status()?;
let playlist_response = resp.json::<response::Playlist>().await?;
let resp_body = resp.text().await?;
let playlist_response =
serde_json::from_str::<response::Playlist>(&resp_body).context(resp_body)?;
map_playlist(&playlist_response)
}
@ -120,55 +123,54 @@ fn map_playlist(response: &response::Playlist) -> Result<Playlist> {
let (videos, ctoken) = map_playlist_items(video_items);
let thumbnail_renderer = some_or_bail!(
response
.sidebar
.playlist_sidebar_renderer
.items
.iter()
.find_map(|s| match s {
response::playlist::SidebarRendererItem::PlaylistSidebarPrimaryInfoRenderer {
thumbnail_renderer,
} => Some(thumbnail_renderer),
_ => None,
}),
Err(anyhow!("no primary sidebar"))
);
let (thumbnails, last_update_txt) = match &response.sidebar {
Some(sidebar) => {
let primary = some_or_bail!(
sidebar.playlist_sidebar_renderer.items.get(0),
Err(anyhow!("no primary sidebar"))
);
let video_owner_wrap = response
.sidebar
.playlist_sidebar_renderer
.items
.iter()
.find_map(|s| match s {
response::playlist::SidebarRendererItem::PlaylistSidebarSecondaryInfoRenderer {
video_owner,
} => Some(video_owner),
_ => None,
});
let n_videos = match ctoken {
Some(_) => {
some_or_bail!(
match &response.header.playlist_header_renderer.num_videos_text {
Text::Multiple { runs } =>
if runs.len() == 2 && runs[1] == " videos" {
runs[0].replace(",", "").replace(".", "").parse().ok()
} else {
None
},
_ => None,
},
Err(anyhow!("no video count"))
(
&primary
.playlist_sidebar_primary_info_renderer
.thumbnail_renderer
.playlist_video_thumbnail_renderer
.thumbnail
.thumbnails,
primary
.playlist_sidebar_primary_info_renderer
.stats
.get(2)
.map(|t| t.to_owned()),
)
}
None => {
let header_banner = some_or_bail!(
&response
.header
.playlist_header_renderer
.playlist_header_banner,
Err(anyhow!("no thumbnail found"))
);
let last_update_txt = response
.header
.playlist_header_renderer
.byline
.get(1)
.map(|b| b.playlist_byline_renderer.text.to_owned());
(
&header_banner
.hero_playlist_thumbnail_renderer
.thumbnail
.thumbnails,
last_update_txt,
)
}
None => videos.len() as u32,
};
let thumbnails = thumbnail_renderer
.playlist_video_thumbnail_renderer
.thumbnail
.thumbnails
let thumbnails = thumbnails
.iter()
.map(|t| Thumbnail {
url: t.url.to_owned(),
@ -177,6 +179,16 @@ fn map_playlist(response: &response::Playlist) -> Result<Playlist> {
})
.collect::<Vec<_>>();
let n_videos = match ctoken {
Some(_) => {
ok_or_bail!(
util::parse_numeric(&response.header.playlist_header_renderer.num_videos_text),
Err(anyhow!("no video count"))
)
}
None => videos.len() as u32,
};
let id = response
.header
.playlist_header_renderer
@ -189,8 +201,8 @@ fn map_playlist(response: &response::Playlist) -> Result<Playlist> {
.description_text
.to_owned();
let channel = match video_owner_wrap {
Some(o) => match &o.video_owner_renderer.title {
let channel = match &response.header.playlist_header_renderer.owner_text {
Some(owner_text) => match owner_text {
TextLink::Browse {
text,
page_type,
@ -217,6 +229,7 @@ fn map_playlist(response: &response::Playlist) -> Result<Playlist> {
description,
channel,
last_update: None,
last_update_txt,
})
}

View file

@ -2,18 +2,16 @@ use serde::Deserialize;
use serde_with::serde_as;
use serde_with::{json::JsonString, DefaultOnError, VecSkipError};
use crate::serializer::text::{Text, TextLink};
use crate::serializer::text::TextLink;
use super::{
ContentRenderer, ContentsRenderer, Thumbnails, ThumbnailsWrap, VideoListItem, VideoOwner,
};
use super::{ContentRenderer, ContentsRenderer, Thumbnails, ThumbnailsWrap, VideoListItem};
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Playlist {
pub contents: Contents,
pub header: Header,
pub sidebar: Sidebar,
pub sidebar: Option<Sidebar>,
}
#[serde_as]
@ -93,8 +91,35 @@ pub struct HeaderRenderer {
#[serde(default)]
#[serde_as(as = "DefaultOnError<Option<crate::serializer::text::Text>>")]
pub description_text: Option<String>,
/// `"495", " videos"`
pub num_videos_text: Text,
#[serde_as(as = "crate::serializer::text::Text")]
pub num_videos_text: String,
#[serde_as(as = "Option<crate::serializer::text::TextLink>")]
pub owner_text: Option<TextLink>,
// Alternative layout
pub playlist_header_banner: Option<PlaylistHeaderBanner>,
#[serde(default)]
pub byline: Vec<Byline>,
}
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct PlaylistHeaderBanner {
pub hero_playlist_thumbnail_renderer: ThumbnailsWrap,
}
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Byline {
pub playlist_byline_renderer: BylineRenderer,
}
#[serde_as]
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct BylineRenderer {
#[serde_as(as = "crate::serializer::text::Text")]
pub text: String,
}
#[derive(Clone, Debug, Deserialize)]
@ -108,22 +133,25 @@ pub struct Sidebar {
#[serde(rename_all = "camelCase")]
pub struct SidebarRenderer {
#[serde_as(as = "VecSkipError<_>")]
pub items: Vec<SidebarRendererItem>,
pub items: Vec<SidebarItemPrimary>,
}
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub enum SidebarRendererItem {
#[serde(rename_all = "camelCase")]
PlaylistSidebarPrimaryInfoRenderer {
thumbnail_renderer: PlaylistThumbnailRenderer,
// - `"495", " videos"`
// - `"3,310,996 views"`
// - `"Last updated on ", "Aug 7, 2022"`
// stats: Vec<Text>,
},
#[serde(rename_all = "camelCase")]
PlaylistSidebarSecondaryInfoRenderer { video_owner: VideoOwner },
pub struct SidebarItemPrimary {
pub playlist_sidebar_primary_info_renderer: SidebarPrimaryInfoRenderer,
}
#[serde_as]
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SidebarPrimaryInfoRenderer {
pub thumbnail_renderer: PlaylistThumbnailRenderer,
// - `"495", " videos"`
// - `"3,310,996 views"`
// - `"Last updated on ", "Aug 7, 2022"`
#[serde_as(as = "Vec<crate::serializer::text::Text>")]
pub stats: Vec<String>,
}
#[derive(Clone, Debug, Deserialize)]

View file

@ -1925,4 +1925,5 @@ channel:
id: UCIekuFeMaV78xYfvpmoCnPg
name: Best Music
last_update: ~
last_update_txt: "Last updated on Aug 7, 2022"

View file

@ -1279,4 +1279,5 @@ channel:
id: UCQM0bS4_04-Y4JuYrgmnpZQ
name: Chaosflo44
last_update: ~
last_update_txt: "Last updated on Jul 2, 2014"

View file

@ -1863,4 +1863,5 @@ thumbnails:
description: ~
channel: ~
last_update: ~
last_update_txt: Updated today

View file

@ -22,7 +22,7 @@ struct QVideoCont {
}
impl RustyTube {
pub async fn get_video_response(&self, video_id: &str) -> Result<response::Video> {
async fn get_video_response(&self, video_id: &str) -> Result<response::Video> {
let client = self.get_ytclient(ClientType::Desktop);
let context = client.get_context(true).await;
let request_body = QVideo {
@ -43,7 +43,7 @@ impl RustyTube {
Ok(resp.json::<response::Video>().await?)
}
pub async fn get_comments_response(&self, ctoken: &str) -> Result<response::VideoComments> {
async fn get_comments_response(&self, ctoken: &str) -> Result<response::VideoComments> {
let client = self.get_ytclient(ClientType::Desktop);
let context = client.get_context(true).await;
let request_body = QVideoCont {
@ -62,7 +62,7 @@ impl RustyTube {
Ok(resp.json::<response::VideoComments>().await?)
}
pub async fn get_recommendations_response(
async fn get_recommendations_response(
&self,
ctoken: &str,
) -> Result<response::VideoRecommendations> {

View file

@ -0,0 +1,77 @@
#![cfg(test)]
use std::{collections::BTreeMap, fs::File, path::Path};
use serde::{Deserialize, Serialize};
use crate::{
client::RustyTube,
model::{locale::LANGUAGES, Country, Language},
};
type CollectedDates = BTreeMap<Language, BTreeMap<DateCase, String>>;
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
enum DateCase {
Today,
Yesterday,
Ago,
Jan,
Feb,
Mar,
Apr,
May,
Jun,
Jul,
Aug,
Sep,
Oct,
Nov,
Dec,
}
#[test_log::test(tokio::test)]
async fn collect_dates() {
let json_path = Path::new("testfiles/date/playlist_samples.json").to_path_buf();
if json_path.exists() {
return;
}
let cases = [
(
DateCase::Today,
"RDCLAK5uy_kj3rhiar1LINmyDcuFnXihEO0K1NQa2jI",
),
(DateCase::Yesterday, "PLmB6td997u3kUOrfFwkULZ910ho44oQSy"),
(DateCase::Ago, "PL7zsB-C3aNu2yRY2869T0zj1FhtRIu5am"),
(DateCase::Jan, "PL1J-6JOckZtHxTA3hN5SK7gBQaFfKzeXr"),
(DateCase::Feb, "PL1J-6JOckZtETrbzwZE7mRIIK6BzWNLAs"),
(DateCase::Mar, "PL1J-6JOckZtG3AVdvBXhMO64mB2k3BtKi"),
(DateCase::Apr, "PL1J-6JOckZtE_rUpK24S6X5hOE4eQoprN"),
(DateCase::May, "PL1J-6JOckZtG1ThBxoSLFL-Jg4sa2iX_a"),
(DateCase::Jun, "PL1J-6JOckZtF_wSzkXBl91pit9d6Fh0QF"),
(DateCase::Jul, "PL1J-6JOckZtE_P9Xx8D3b2O6w0idhuKBe"),
(DateCase::Aug, "PL1J-6JOckZtFFQeWx-ZC0ubpJCEWmGWRx"),
(DateCase::Sep, "PL1J-6JOckZtHVs0JhBW_qfsW-dtXuM0mQ"),
(DateCase::Oct, "PL1J-6JOckZtE4g-XgZkL_N0kkoKui5Eys"),
(DateCase::Nov, "PL1J-6JOckZtEzjMUEyPyPpG836pjeIapw"),
(DateCase::Dec, "PL1J-6JOckZtHo91uApeb10Qlf2XhkfM-9"),
];
let mut collected_dates = CollectedDates::new();
for lang in LANGUAGES {
let rp = RustyTube::new_with_ua(lang, Country::Us, None);
let mut map: BTreeMap<DateCase, String> = BTreeMap::new();
for (case, pl_id) in cases {
let playlist = rp.get_playlist(pl_id).await.unwrap();
map.insert(case, playlist.last_update_txt.unwrap());
}
collected_dates.insert(lang, map);
}
let file = File::create(json_path).unwrap();
serde_json::to_writer_pretty(file, &collected_dates).unwrap();
}

View file

@ -1,3 +1,4 @@
#![cfg(test)]
mod collect_playlist_dates;
mod gen_dictionary;
mod gen_locales;

View file

@ -10,9 +10,9 @@ mod cache;
mod deobfuscate;
mod dictionary;
mod serializer;
mod timeago;
mod util;
pub mod client;
pub mod download;
pub mod model;
pub mod timeago;

View file

@ -1,4 +1,4 @@
mod locale;
pub mod locale;
mod ordering;
pub mod stream_filter;
@ -34,6 +34,7 @@ pub struct Playlist {
pub description: Option<String>,
pub channel: Option<Channel>,
pub last_update: Option<DateTime<Utc>>,
pub last_update_txt: Option<String>,
}
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]

View file

@ -92,7 +92,7 @@ pub fn parse(lang: Language, textual_date: &str) -> Option<TimeAgo> {
.flatten()
})
}
_ => filtered_str.split(' ').find_map(|word| {
_ => filtered_str.split_whitespace().find_map(|word| {
mappings
.get(word)
.map(|t| match t.unit {
@ -111,8 +111,24 @@ pub fn parse(lang: Language, textual_date: &str) -> Option<TimeAgo> {
mod tests {
use std::{collections::BTreeMap, fs::File, io::BufReader, path::Path};
use rstest::rstest;
use super::*;
#[rstest]
#[case(Language::De, "vor 1 Sekunde", Some(TimeAgo { n: 1, unit: TimeUnit::Second }))]
#[case(Language::Ar, "قبل ساعة واحدة", Some(TimeAgo { n: 1, unit: TimeUnit::Hour }))]
// No-break space
#[case(Language::De, "Vor 3\u{a0}Tagen aktualisiert", Some(TimeAgo { n: 3, unit: TimeUnit::Day }))]
fn t_parse(
#[case] lang: Language,
#[case] textual_date: &str,
#[case] expect: Option<TimeAgo>,
) {
let secs_ago = parse(lang, textual_date);
assert_eq!(secs_ago, expect);
}
#[test]
fn t_testfile() {
let json_path = Path::new("testfiles/date/timeago_samples.json");

File diff suppressed because it is too large Load diff