add playlist date collector
This commit is contained in:
parent
513bf1dc9c
commit
c9433d721d
14 changed files with 20408 additions and 75 deletions
18780
notes/browse/playlist_missing_sidebar.json
Normal file
18780
notes/browse/playlist_missing_sidebar.json
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -31,8 +31,9 @@ Throttling issue: Y8JFxS1HlDo
|
|||
|
||||
Playlist update dates (05.09.2022):
|
||||
today: RDCLAK5uy_kj3rhiar1LINmyDcuFnXihEO0K1NQa2jI
|
||||
yesterday: PL4C44E2875308A280
|
||||
yesterday: PLmB6td997u3kUOrfFwkULZ910ho44oQSy
|
||||
2 days ago: PL7zsB-C3aNu2yRY2869T0zj1FhtRIu5am
|
||||
4 days ago: PLFgm_LJYNEttGkVLopr0a25OAySzfS941
|
||||
5 days ago: PL3-sRm8xAzY9sDilvaWjCwCI0TkUzYdOG
|
||||
7 days ago: PLHr0jWPfopte182N54r1ra7tkRJC1fmPu
|
||||
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
use anyhow::{anyhow, Result};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use reqwest::Method;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
model::{Channel, Playlist, Thumbnail, Video},
|
||||
serializer::text::{PageType, Text, TextLink},
|
||||
serializer::text::{PageType, TextLink},
|
||||
util,
|
||||
};
|
||||
|
||||
use super::{response, ClientType, ContextYT, RustyTube};
|
||||
|
|
@ -41,7 +42,9 @@ impl RustyTube {
|
|||
.await?
|
||||
.error_for_status()?;
|
||||
|
||||
let playlist_response = resp.json::<response::Playlist>().await?;
|
||||
let resp_body = resp.text().await?;
|
||||
let playlist_response =
|
||||
serde_json::from_str::<response::Playlist>(&resp_body).context(resp_body)?;
|
||||
|
||||
map_playlist(&playlist_response)
|
||||
}
|
||||
|
|
@ -120,55 +123,54 @@ fn map_playlist(response: &response::Playlist) -> Result<Playlist> {
|
|||
|
||||
let (videos, ctoken) = map_playlist_items(video_items);
|
||||
|
||||
let thumbnail_renderer = some_or_bail!(
|
||||
response
|
||||
.sidebar
|
||||
.playlist_sidebar_renderer
|
||||
.items
|
||||
.iter()
|
||||
.find_map(|s| match s {
|
||||
response::playlist::SidebarRendererItem::PlaylistSidebarPrimaryInfoRenderer {
|
||||
thumbnail_renderer,
|
||||
} => Some(thumbnail_renderer),
|
||||
_ => None,
|
||||
}),
|
||||
Err(anyhow!("no primary sidebar"))
|
||||
);
|
||||
let (thumbnails, last_update_txt) = match &response.sidebar {
|
||||
Some(sidebar) => {
|
||||
let primary = some_or_bail!(
|
||||
sidebar.playlist_sidebar_renderer.items.get(0),
|
||||
Err(anyhow!("no primary sidebar"))
|
||||
);
|
||||
|
||||
let video_owner_wrap = response
|
||||
.sidebar
|
||||
.playlist_sidebar_renderer
|
||||
.items
|
||||
.iter()
|
||||
.find_map(|s| match s {
|
||||
response::playlist::SidebarRendererItem::PlaylistSidebarSecondaryInfoRenderer {
|
||||
video_owner,
|
||||
} => Some(video_owner),
|
||||
_ => None,
|
||||
});
|
||||
|
||||
let n_videos = match ctoken {
|
||||
Some(_) => {
|
||||
some_or_bail!(
|
||||
match &response.header.playlist_header_renderer.num_videos_text {
|
||||
Text::Multiple { runs } =>
|
||||
if runs.len() == 2 && runs[1] == " videos" {
|
||||
runs[0].replace(",", "").replace(".", "").parse().ok()
|
||||
} else {
|
||||
None
|
||||
},
|
||||
_ => None,
|
||||
},
|
||||
Err(anyhow!("no video count"))
|
||||
(
|
||||
&primary
|
||||
.playlist_sidebar_primary_info_renderer
|
||||
.thumbnail_renderer
|
||||
.playlist_video_thumbnail_renderer
|
||||
.thumbnail
|
||||
.thumbnails,
|
||||
primary
|
||||
.playlist_sidebar_primary_info_renderer
|
||||
.stats
|
||||
.get(2)
|
||||
.map(|t| t.to_owned()),
|
||||
)
|
||||
}
|
||||
None => {
|
||||
let header_banner = some_or_bail!(
|
||||
&response
|
||||
.header
|
||||
.playlist_header_renderer
|
||||
.playlist_header_banner,
|
||||
Err(anyhow!("no thumbnail found"))
|
||||
);
|
||||
|
||||
let last_update_txt = response
|
||||
.header
|
||||
.playlist_header_renderer
|
||||
.byline
|
||||
.get(1)
|
||||
.map(|b| b.playlist_byline_renderer.text.to_owned());
|
||||
|
||||
(
|
||||
&header_banner
|
||||
.hero_playlist_thumbnail_renderer
|
||||
.thumbnail
|
||||
.thumbnails,
|
||||
last_update_txt,
|
||||
)
|
||||
}
|
||||
None => videos.len() as u32,
|
||||
};
|
||||
|
||||
let thumbnails = thumbnail_renderer
|
||||
.playlist_video_thumbnail_renderer
|
||||
.thumbnail
|
||||
.thumbnails
|
||||
let thumbnails = thumbnails
|
||||
.iter()
|
||||
.map(|t| Thumbnail {
|
||||
url: t.url.to_owned(),
|
||||
|
|
@ -177,6 +179,16 @@ fn map_playlist(response: &response::Playlist) -> Result<Playlist> {
|
|||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let n_videos = match ctoken {
|
||||
Some(_) => {
|
||||
ok_or_bail!(
|
||||
util::parse_numeric(&response.header.playlist_header_renderer.num_videos_text),
|
||||
Err(anyhow!("no video count"))
|
||||
)
|
||||
}
|
||||
None => videos.len() as u32,
|
||||
};
|
||||
|
||||
let id = response
|
||||
.header
|
||||
.playlist_header_renderer
|
||||
|
|
@ -189,8 +201,8 @@ fn map_playlist(response: &response::Playlist) -> Result<Playlist> {
|
|||
.description_text
|
||||
.to_owned();
|
||||
|
||||
let channel = match video_owner_wrap {
|
||||
Some(o) => match &o.video_owner_renderer.title {
|
||||
let channel = match &response.header.playlist_header_renderer.owner_text {
|
||||
Some(owner_text) => match owner_text {
|
||||
TextLink::Browse {
|
||||
text,
|
||||
page_type,
|
||||
|
|
@ -217,6 +229,7 @@ fn map_playlist(response: &response::Playlist) -> Result<Playlist> {
|
|||
description,
|
||||
channel,
|
||||
last_update: None,
|
||||
last_update_txt,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,18 +2,16 @@ use serde::Deserialize;
|
|||
use serde_with::serde_as;
|
||||
use serde_with::{json::JsonString, DefaultOnError, VecSkipError};
|
||||
|
||||
use crate::serializer::text::{Text, TextLink};
|
||||
use crate::serializer::text::TextLink;
|
||||
|
||||
use super::{
|
||||
ContentRenderer, ContentsRenderer, Thumbnails, ThumbnailsWrap, VideoListItem, VideoOwner,
|
||||
};
|
||||
use super::{ContentRenderer, ContentsRenderer, Thumbnails, ThumbnailsWrap, VideoListItem};
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Playlist {
|
||||
pub contents: Contents,
|
||||
pub header: Header,
|
||||
pub sidebar: Sidebar,
|
||||
pub sidebar: Option<Sidebar>,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
|
|
@ -93,8 +91,35 @@ pub struct HeaderRenderer {
|
|||
#[serde(default)]
|
||||
#[serde_as(as = "DefaultOnError<Option<crate::serializer::text::Text>>")]
|
||||
pub description_text: Option<String>,
|
||||
/// `"495", " videos"`
|
||||
pub num_videos_text: Text,
|
||||
#[serde_as(as = "crate::serializer::text::Text")]
|
||||
pub num_videos_text: String,
|
||||
#[serde_as(as = "Option<crate::serializer::text::TextLink>")]
|
||||
pub owner_text: Option<TextLink>,
|
||||
|
||||
// Alternative layout
|
||||
pub playlist_header_banner: Option<PlaylistHeaderBanner>,
|
||||
#[serde(default)]
|
||||
pub byline: Vec<Byline>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct PlaylistHeaderBanner {
|
||||
pub hero_playlist_thumbnail_renderer: ThumbnailsWrap,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Byline {
|
||||
pub playlist_byline_renderer: BylineRenderer,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct BylineRenderer {
|
||||
#[serde_as(as = "crate::serializer::text::Text")]
|
||||
pub text: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
|
|
@ -108,22 +133,25 @@ pub struct Sidebar {
|
|||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SidebarRenderer {
|
||||
#[serde_as(as = "VecSkipError<_>")]
|
||||
pub items: Vec<SidebarRendererItem>,
|
||||
pub items: Vec<SidebarItemPrimary>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub enum SidebarRendererItem {
|
||||
#[serde(rename_all = "camelCase")]
|
||||
PlaylistSidebarPrimaryInfoRenderer {
|
||||
thumbnail_renderer: PlaylistThumbnailRenderer,
|
||||
// - `"495", " videos"`
|
||||
// - `"3,310,996 views"`
|
||||
// - `"Last updated on ", "Aug 7, 2022"`
|
||||
// stats: Vec<Text>,
|
||||
},
|
||||
#[serde(rename_all = "camelCase")]
|
||||
PlaylistSidebarSecondaryInfoRenderer { video_owner: VideoOwner },
|
||||
pub struct SidebarItemPrimary {
|
||||
pub playlist_sidebar_primary_info_renderer: SidebarPrimaryInfoRenderer,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SidebarPrimaryInfoRenderer {
|
||||
pub thumbnail_renderer: PlaylistThumbnailRenderer,
|
||||
// - `"495", " videos"`
|
||||
// - `"3,310,996 views"`
|
||||
// - `"Last updated on ", "Aug 7, 2022"`
|
||||
#[serde_as(as = "Vec<crate::serializer::text::Text>")]
|
||||
pub stats: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
|
|
|
|||
|
|
@ -1925,4 +1925,5 @@ channel:
|
|||
id: UCIekuFeMaV78xYfvpmoCnPg
|
||||
name: Best Music
|
||||
last_update: ~
|
||||
last_update_txt: "Last updated on Aug 7, 2022"
|
||||
|
||||
|
|
|
|||
|
|
@ -1279,4 +1279,5 @@ channel:
|
|||
id: UCQM0bS4_04-Y4JuYrgmnpZQ
|
||||
name: Chaosflo44
|
||||
last_update: ~
|
||||
last_update_txt: "Last updated on Jul 2, 2014"
|
||||
|
||||
|
|
|
|||
|
|
@ -1863,4 +1863,5 @@ thumbnails:
|
|||
description: ~
|
||||
channel: ~
|
||||
last_update: ~
|
||||
last_update_txt: Updated today
|
||||
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ struct QVideoCont {
|
|||
}
|
||||
|
||||
impl RustyTube {
|
||||
pub async fn get_video_response(&self, video_id: &str) -> Result<response::Video> {
|
||||
async fn get_video_response(&self, video_id: &str) -> Result<response::Video> {
|
||||
let client = self.get_ytclient(ClientType::Desktop);
|
||||
let context = client.get_context(true).await;
|
||||
let request_body = QVideo {
|
||||
|
|
@ -43,7 +43,7 @@ impl RustyTube {
|
|||
Ok(resp.json::<response::Video>().await?)
|
||||
}
|
||||
|
||||
pub async fn get_comments_response(&self, ctoken: &str) -> Result<response::VideoComments> {
|
||||
async fn get_comments_response(&self, ctoken: &str) -> Result<response::VideoComments> {
|
||||
let client = self.get_ytclient(ClientType::Desktop);
|
||||
let context = client.get_context(true).await;
|
||||
let request_body = QVideoCont {
|
||||
|
|
@ -62,7 +62,7 @@ impl RustyTube {
|
|||
Ok(resp.json::<response::VideoComments>().await?)
|
||||
}
|
||||
|
||||
pub async fn get_recommendations_response(
|
||||
async fn get_recommendations_response(
|
||||
&self,
|
||||
ctoken: &str,
|
||||
) -> Result<response::VideoRecommendations> {
|
||||
|
|
|
|||
77
src/codegen/collect_playlist_dates.rs
Normal file
77
src/codegen/collect_playlist_dates.rs
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
#![cfg(test)]
|
||||
|
||||
use std::{collections::BTreeMap, fs::File, path::Path};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{
|
||||
client::RustyTube,
|
||||
model::{locale::LANGUAGES, Country, Language},
|
||||
};
|
||||
|
||||
type CollectedDates = BTreeMap<Language, BTreeMap<DateCase, String>>;
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
|
||||
enum DateCase {
|
||||
Today,
|
||||
Yesterday,
|
||||
Ago,
|
||||
Jan,
|
||||
Feb,
|
||||
Mar,
|
||||
Apr,
|
||||
May,
|
||||
Jun,
|
||||
Jul,
|
||||
Aug,
|
||||
Sep,
|
||||
Oct,
|
||||
Nov,
|
||||
Dec,
|
||||
}
|
||||
|
||||
#[test_log::test(tokio::test)]
|
||||
async fn collect_dates() {
|
||||
let json_path = Path::new("testfiles/date/playlist_samples.json").to_path_buf();
|
||||
if json_path.exists() {
|
||||
return;
|
||||
}
|
||||
|
||||
let cases = [
|
||||
(
|
||||
DateCase::Today,
|
||||
"RDCLAK5uy_kj3rhiar1LINmyDcuFnXihEO0K1NQa2jI",
|
||||
),
|
||||
(DateCase::Yesterday, "PLmB6td997u3kUOrfFwkULZ910ho44oQSy"),
|
||||
(DateCase::Ago, "PL7zsB-C3aNu2yRY2869T0zj1FhtRIu5am"),
|
||||
(DateCase::Jan, "PL1J-6JOckZtHxTA3hN5SK7gBQaFfKzeXr"),
|
||||
(DateCase::Feb, "PL1J-6JOckZtETrbzwZE7mRIIK6BzWNLAs"),
|
||||
(DateCase::Mar, "PL1J-6JOckZtG3AVdvBXhMO64mB2k3BtKi"),
|
||||
(DateCase::Apr, "PL1J-6JOckZtE_rUpK24S6X5hOE4eQoprN"),
|
||||
(DateCase::May, "PL1J-6JOckZtG1ThBxoSLFL-Jg4sa2iX_a"),
|
||||
(DateCase::Jun, "PL1J-6JOckZtF_wSzkXBl91pit9d6Fh0QF"),
|
||||
(DateCase::Jul, "PL1J-6JOckZtE_P9Xx8D3b2O6w0idhuKBe"),
|
||||
(DateCase::Aug, "PL1J-6JOckZtFFQeWx-ZC0ubpJCEWmGWRx"),
|
||||
(DateCase::Sep, "PL1J-6JOckZtHVs0JhBW_qfsW-dtXuM0mQ"),
|
||||
(DateCase::Oct, "PL1J-6JOckZtE4g-XgZkL_N0kkoKui5Eys"),
|
||||
(DateCase::Nov, "PL1J-6JOckZtEzjMUEyPyPpG836pjeIapw"),
|
||||
(DateCase::Dec, "PL1J-6JOckZtHo91uApeb10Qlf2XhkfM-9"),
|
||||
];
|
||||
|
||||
let mut collected_dates = CollectedDates::new();
|
||||
|
||||
for lang in LANGUAGES {
|
||||
let rp = RustyTube::new_with_ua(lang, Country::Us, None);
|
||||
let mut map: BTreeMap<DateCase, String> = BTreeMap::new();
|
||||
|
||||
for (case, pl_id) in cases {
|
||||
let playlist = rp.get_playlist(pl_id).await.unwrap();
|
||||
map.insert(case, playlist.last_update_txt.unwrap());
|
||||
}
|
||||
|
||||
collected_dates.insert(lang, map);
|
||||
}
|
||||
|
||||
let file = File::create(json_path).unwrap();
|
||||
serde_json::to_writer_pretty(file, &collected_dates).unwrap();
|
||||
}
|
||||
|
|
@ -1,3 +1,4 @@
|
|||
#![cfg(test)]
|
||||
mod collect_playlist_dates;
|
||||
mod gen_dictionary;
|
||||
mod gen_locales;
|
||||
|
|
|
|||
|
|
@ -10,9 +10,9 @@ mod cache;
|
|||
mod deobfuscate;
|
||||
mod dictionary;
|
||||
mod serializer;
|
||||
mod timeago;
|
||||
mod util;
|
||||
|
||||
pub mod client;
|
||||
pub mod download;
|
||||
pub mod model;
|
||||
pub mod timeago;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
mod locale;
|
||||
pub mod locale;
|
||||
mod ordering;
|
||||
pub mod stream_filter;
|
||||
|
||||
|
|
@ -34,6 +34,7 @@ pub struct Playlist {
|
|||
pub description: Option<String>,
|
||||
pub channel: Option<Channel>,
|
||||
pub last_update: Option<DateTime<Utc>>,
|
||||
pub last_update_txt: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ pub fn parse(lang: Language, textual_date: &str) -> Option<TimeAgo> {
|
|||
.flatten()
|
||||
})
|
||||
}
|
||||
_ => filtered_str.split(' ').find_map(|word| {
|
||||
_ => filtered_str.split_whitespace().find_map(|word| {
|
||||
mappings
|
||||
.get(word)
|
||||
.map(|t| match t.unit {
|
||||
|
|
@ -111,8 +111,24 @@ pub fn parse(lang: Language, textual_date: &str) -> Option<TimeAgo> {
|
|||
mod tests {
|
||||
use std::{collections::BTreeMap, fs::File, io::BufReader, path::Path};
|
||||
|
||||
use rstest::rstest;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[rstest]
|
||||
#[case(Language::De, "vor 1 Sekunde", Some(TimeAgo { n: 1, unit: TimeUnit::Second }))]
|
||||
#[case(Language::Ar, "قبل ساعة واحدة", Some(TimeAgo { n: 1, unit: TimeUnit::Hour }))]
|
||||
// No-break space
|
||||
#[case(Language::De, "Vor 3\u{a0}Tagen aktualisiert", Some(TimeAgo { n: 3, unit: TimeUnit::Day }))]
|
||||
fn t_parse(
|
||||
#[case] lang: Language,
|
||||
#[case] textual_date: &str,
|
||||
#[case] expect: Option<TimeAgo>,
|
||||
) {
|
||||
let secs_ago = parse(lang, textual_date);
|
||||
assert_eq!(secs_ago, expect);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn t_testfile() {
|
||||
let json_path = Path::new("testfiles/date/timeago_samples.json");
|
||||
|
|
|
|||
1413
testfiles/date/playlist_samples.json
Normal file
1413
testfiles/date/playlist_samples.json
Normal file
File diff suppressed because it is too large
Load diff
Reference in a new issue