feat(codegen): collected video duration samples

This commit is contained in:
ThetaDev 2023-05-06 21:12:49 +02:00
parent 19781eab36
commit 800073df48
14 changed files with 7325 additions and 1449 deletions

View file

@ -19,5 +19,7 @@ phf_codegen = "0.11.1"
once_cell = "1.12.0"
regex = "1.7.1"
indicatif = "0.17.0"
num_enum = "0.5.7"
num_enum = "0.6.1"
path_macro = "1.0.0"
intl_pluralrules = "7.0.2"
unic-langid = "0.9.1"

View file

@ -1,4 +1,4 @@
use std::{collections::BTreeMap, fs::File, io::BufReader, path::Path};
use std::{collections::BTreeMap, fs::File, io::BufReader};
use futures::stream::{self, StreamExt};
use path_macro::path;
@ -9,10 +9,13 @@ use rustypipe::{
};
use serde::Deserialize;
use crate::util::{self, QBrowse, TextRuns};
use crate::{
model::{QBrowse, TextRuns},
util::{self, DICT_DIR},
};
pub async fn collect_album_types(project_root: &Path, concurrency: usize) {
let json_path = path!(project_root / "testfiles" / "dict" / "album_type_samples.json");
pub async fn collect_album_types(concurrency: usize) {
let json_path = path!(*DICT_DIR / "album_type_samples.json");
let album_types = [
(AlbumType::Album, "MPREb_nlBWQROfvjo"),
@ -48,13 +51,13 @@ pub async fn collect_album_types(project_root: &Path, concurrency: usize) {
serde_json::to_writer_pretty(file, &collected_album_types).unwrap();
}
pub fn write_samples_to_dict(project_root: &Path) {
let json_path = path!(project_root / "testfiles" / "dict" / "album_type_samples.json");
pub fn write_samples_to_dict() {
let json_path = path!(*DICT_DIR / "album_type_samples.json");
let json_file = File::open(json_path).unwrap();
let collected: BTreeMap<Language, BTreeMap<AlbumType, String>> =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let mut dict = util::read_dict(project_root);
let mut dict = util::read_dict();
let langs = dict.keys().map(|k| k.to_owned()).collect::<Vec<_>>();
for lang in langs {
@ -72,7 +75,7 @@ pub fn write_samples_to_dict(project_root: &Path) {
});
}
util::write_dict(project_root, dict);
util::write_dict(dict);
}
#[derive(Debug, Deserialize)]

View file

@ -3,7 +3,6 @@ use std::{
collections::{BTreeMap, HashMap, HashSet},
fs::File,
io::BufReader,
path::Path,
};
use anyhow::{Context, Result};
@ -14,9 +13,13 @@ use regex::Regex;
use rustypipe::client::{ClientType, RustyPipe, RustyPipeQuery};
use rustypipe::param::{locale::LANGUAGES, Language};
use serde::Deserialize;
use serde_with::{serde_as, DefaultOnError, VecSkipError};
use crate::util::{self, QBrowse, QCont, Text, TextRuns};
use crate::model::{Channel, ContinuationResponse};
use crate::util::DICT_DIR;
use crate::{
model::{QBrowse, QCont, TextRuns},
util,
};
type CollectedNumbers = BTreeMap<Language, BTreeMap<String, u64>>;
@ -34,8 +37,8 @@ type CollectedNumbers = BTreeMap<Language, BTreeMap<String, u64>>;
/// We extract these instead of subscriber counts because the YouTube API
/// outputs view counts both in approximated and exact format, so we can use
/// the exact counts to figure out the tokens.
pub async fn collect_large_numbers(project_root: &Path, concurrency: usize) {
let json_path = path!(project_root / "testfiles" / "dict" / "large_number_samples_all.json");
pub async fn collect_large_numbers(concurrency: usize) {
let json_path = path!(*DICT_DIR / "large_number_samples_all.json");
let rp = RustyPipe::new();
let channels = [
@ -137,13 +140,13 @@ pub async fn collect_large_numbers(project_root: &Path, concurrency: usize) {
/// Attempt to parse the numbers collected by `collect-large-numbers`
/// and write the results to `dictionary.json`.
pub fn write_samples_to_dict(project_root: &Path) {
let json_path = path!(project_root / "testfiles" / "dict" / "large_number_samples.json");
pub fn write_samples_to_dict() {
let json_path = path!(*DICT_DIR / "large_number_samples.json");
let json_file = File::open(json_path).unwrap();
let collected_nums: CollectedNumbers =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let mut dict = util::read_dict(project_root);
let mut dict = util::read_dict();
let langs = dict.keys().map(|k| k.to_owned()).collect::<Vec<_>>();
static POINT_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"\d(\.|,)\d{1,3}(?:\D|$)").unwrap());
@ -292,164 +295,13 @@ pub fn write_samples_to_dict(project_root: &Path) {
}
}
util::write_dict(project_root, dict);
util::write_dict(dict);
}
fn get_mag(n: u64) -> u8 {
(n as f64).log10().floor() as u8
}
/*
YouTube channel videos response
*/
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct Channel {
contents: Contents,
header: ChannelHeader,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct ChannelHeader {
c4_tabbed_header_renderer: HeaderRenderer,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct HeaderRenderer {
subscriber_count_text: Text,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct Contents {
two_column_browse_results_renderer: TabsRenderer,
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct TabsRenderer {
#[serde_as(as = "VecSkipError<_>")]
tabs: Vec<TabRendererWrap>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct TabRendererWrap {
tab_renderer: TabRenderer,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct TabRenderer {
content: RichGridRendererWrap,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct RichGridRendererWrap {
rich_grid_renderer: RichGridRenderer,
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct RichGridRenderer {
#[serde_as(as = "VecSkipError<_>")]
contents: Vec<RichItemRendererWrap>,
#[serde(default)]
#[serde_as(as = "DefaultOnError")]
header: Option<RichGridHeader>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct RichItemRendererWrap {
rich_item_renderer: RichItemRenderer,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct RichItemRenderer {
content: VideoRendererWrap,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct VideoRendererWrap {
video_renderer: VideoRenderer,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct VideoRenderer {
/// `24,194 views`
view_count_text: Text,
/// `19K views`
short_view_count_text: Text,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct RichGridHeader {
feed_filter_chip_bar_renderer: ChipBar,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct ChipBar {
contents: Vec<Chip>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct Chip {
chip_cloud_chip_renderer: ChipRenderer,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct ChipRenderer {
navigation_endpoint: NavigationEndpoint,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct NavigationEndpoint {
continuation_command: ContinuationCommand,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct ContinuationCommand {
token: String,
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct ContinuationResponse {
// #[serde_as(as = "VecSkipError<_>")]
on_response_received_actions: Vec<ContinuationAction>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct ContinuationAction {
reload_continuation_items_command: ContinuationItemsWrap,
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct ContinuationItemsWrap {
#[serde_as(as = "VecSkipError<_>")]
continuation_items: Vec<RichItemRendererWrap>,
}
/*
YouTube Music channel data
*/

View file

@ -3,7 +3,6 @@ use std::{
fs::File,
hash::Hash,
io::BufReader,
path::Path,
};
use futures::{stream, StreamExt};
@ -11,11 +10,10 @@ use path_macro::path;
use rustypipe::{
client::RustyPipe,
param::{locale::LANGUAGES, Language},
timeago::{self, TimeAgo},
};
use serde::{Deserialize, Serialize};
use crate::util;
use crate::util::{self, DICT_DIR};
type CollectedDates = BTreeMap<Language, BTreeMap<DateCase, String>>;
@ -38,8 +36,6 @@ enum DateCase {
Dec,
}
const N_AGO: u8 = 5;
/// Collect 'Playlist updated' dates in every supported language
/// and write them to `testfiles/dict/playlist_samples.json`.
///
@ -64,8 +60,8 @@ const N_AGO: u8 = 5;
///
/// Because the relative dates change with time, the first three playlists
/// have to checked and eventually changed before running the program.
pub async fn collect_dates(project_root: &Path, concurrency: usize) {
let json_path = path!(project_root / "testfiles" / "dict" / "playlist_samples.json");
pub async fn collect_dates(concurrency: usize) {
let json_path = path!(*DICT_DIR / "playlist_samples.json");
// These are the sample playlists
let cases = [
@ -115,13 +111,13 @@ pub async fn collect_dates(project_root: &Path, concurrency: usize) {
///
/// The ND (no digit) tokens (today, tomorrow) of some languages cannot be
/// parsed automatically and require manual work.
pub fn write_samples_to_dict(project_root: &Path) {
let json_path = path!(project_root / "testfiles" / "dict" / "playlist_samples.json");
pub fn write_samples_to_dict() {
let json_path = path!(*DICT_DIR / "playlist_samples.json");
let json_file = File::open(json_path).unwrap();
let collected_dates: CollectedDates =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let mut dict = util::read_dict(project_root);
let mut dict = util::read_dict();
let langs = dict.keys().map(|k| k.to_owned()).collect::<Vec<_>>();
let months = [
@ -200,20 +196,6 @@ pub fn write_samples_to_dict(project_root: &Path) {
parse(datestr_table.get(&DateCase::Jan).unwrap(), 0);
}
// n days ago
{
let datestr = datestr_table.get(&DateCase::Ago).unwrap();
let tago = timeago::parse_timeago(lang, datestr);
assert_eq!(
tago,
Some(TimeAgo {
n: N_AGO,
unit: timeago::TimeUnit::Day
}),
"lang: {lang}, txt: {datestr}"
);
}
// Absolute dates (Jan 3, 2020)
months.iter().enumerate().for_each(|(n, m)| {
let datestr = datestr_table.get(m).unwrap();
@ -291,5 +273,5 @@ pub fn write_samples_to_dict(project_root: &Path) {
dict_entry.date_order = num_order;
}
util::write_dict(project_root, dict);
util::write_dict(dict);
}

View file

@ -0,0 +1,176 @@
use std::{collections::BTreeMap, fs::File};
use anyhow::Result;
use futures::{stream, StreamExt};
use path_macro::path;
use rustypipe::{
client::{ClientType, RustyPipe, RustyPipeQuery},
param::{locale::LANGUAGES, Language},
};
use crate::{
model::{Channel, QBrowse},
util::{self, DICT_DIR},
};
type CollectedDurations = BTreeMap<Language, BTreeMap<String, u32>>;
/// Collect the video duration texts in every supported language
/// and write them to `testfiles/dict/video_duration_samples.json`.
///
/// The length of YouTube short videos is only available in textual form.
/// To parse it correctly, we need to collect samples of this text in every
/// language. We collect these samples from regular channel videos because these
/// include a textual duration in addition to the easy to parse "mm:ss"
/// duration format.
pub async fn collect_video_durations(concurrency: usize) {
let json_path = path!(*DICT_DIR / "video_duration_samples.json");
let rp = RustyPipe::new();
let channels = [
"UCq-Fj5jknLsUf-MWSy4_brA",
"UCMcS5ITpSohfr8Ppzlo4vKw",
"UCXuqSBlHAE6Xw-yeJA0Tunw",
];
let durations: CollectedDurations = stream::iter(LANGUAGES)
.map(|lang| {
let rp = rp.query().lang(lang);
async move {
let mut map = BTreeMap::new();
for (n, ch_id) in channels.iter().enumerate() {
get_channel_vlengths(&rp, ch_id, &mut map).await.unwrap();
println!("collected {lang}-{n}");
}
// Since we are only parsing shorts durations, we do not need durations >= 1h
let map = map.into_iter().filter(|(_, v)| v < &3600).collect();
(lang, map)
}
})
.buffer_unordered(concurrency)
.collect()
.await;
let file = File::create(json_path).unwrap();
serde_json::to_writer_pretty(file, &durations).unwrap();
}
async fn get_channel_vlengths(
query: &RustyPipeQuery,
channel_id: &str,
map: &mut BTreeMap<String, u32>,
) -> Result<()> {
let resp = query
.raw(
ClientType::Desktop,
"browse",
&QBrowse {
context: query.get_context(ClientType::Desktop, true, None).await,
browse_id: channel_id,
params: Some("EgZ2aWRlb3MYASAAMAE"),
},
)
.await?;
let channel = serde_json::from_str::<Channel>(&resp)?;
let tab = channel
.contents
.two_column_browse_results_renderer
.tabs
.into_iter()
.next()
.unwrap()
.tab_renderer
.content
.rich_grid_renderer;
tab.contents.into_iter().for_each(|c| {
let lt = c.rich_item_renderer.content.video_renderer.length_text;
let duration = util::parse_video_length(&lt.simple_text).unwrap();
map.insert(lt.accessibility.accessibility_data.label, duration);
});
Ok(())
}
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
enum PluralCategory {
Zero,
One,
Two,
Few,
Many,
Other,
}
impl From<intl_pluralrules::PluralCategory> for PluralCategory {
fn from(value: intl_pluralrules::PluralCategory) -> Self {
match value {
intl_pluralrules::PluralCategory::ZERO => Self::Zero,
intl_pluralrules::PluralCategory::ONE => Self::One,
intl_pluralrules::PluralCategory::TWO => Self::Two,
intl_pluralrules::PluralCategory::FEW => Self::Few,
intl_pluralrules::PluralCategory::MANY => Self::Many,
intl_pluralrules::PluralCategory::OTHER => Self::Other,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::HashSet;
use std::io::BufReader;
use intl_pluralrules::{PluralRuleType, PluralRules};
use unic_langid::LanguageIdentifier;
fn split_duration(d: u32) -> (u32, u32) {
(d / 60, d % 60)
}
/// Verify that the duration sample set covers all pluralization variants of the languages
#[test]
fn check_video_duration_samples() {
let json_path = path!(*DICT_DIR / "video_duration_samples.json");
let json_file = File::open(json_path).unwrap();
let durations: CollectedDurations =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let mut failed = false;
for (lang, durations) in durations {
let ul: LanguageIdentifier =
lang.to_string().split('-').next().unwrap().parse().unwrap();
let pr = PluralRules::create(ul, PluralRuleType::CARDINAL).expect(&lang.to_string());
let mut plurals_m: HashSet<PluralCategory> = HashSet::new();
for n in 1..60 {
plurals_m.insert(pr.select(n).unwrap().into());
}
let mut plurals_s = plurals_m.clone();
durations.values().for_each(|v| {
let (m, s) = split_duration(*v);
plurals_m.remove(&pr.select(m).unwrap().into());
plurals_s.remove(&pr.select(s).unwrap().into());
});
if !plurals_m.is_empty() {
println!("{lang}: missing minutes {plurals_m:?}");
failed = true;
}
if !plurals_s.is_empty() {
println!("{lang}: missing seconds {plurals_m:?}");
failed = true;
}
}
assert!(!failed);
}
}

View file

@ -5,6 +5,7 @@ use std::{
sync::Mutex,
};
use path_macro::path;
use rustypipe::{
client::{ClientType, RustyPipe},
param::{
@ -14,55 +15,54 @@ use rustypipe::{
report::{Report, Reporter},
};
pub async fn download_testfiles(project_root: &Path) {
let mut testfiles = project_root.to_path_buf();
testfiles.push("testfiles");
use crate::util::TESTFILES_DIR;
player(&testfiles).await;
player_model(&testfiles).await;
playlist(&testfiles).await;
playlist_cont(&testfiles).await;
video_details(&testfiles).await;
comments_top(&testfiles).await;
comments_latest(&testfiles).await;
recommendations(&testfiles).await;
channel_videos(&testfiles).await;
channel_shorts(&testfiles).await;
channel_livestreams(&testfiles).await;
channel_playlists(&testfiles).await;
channel_info(&testfiles).await;
channel_videos_cont(&testfiles).await;
channel_playlists_cont(&testfiles).await;
search(&testfiles).await;
search_cont(&testfiles).await;
search_playlists(&testfiles).await;
search_empty(&testfiles).await;
startpage(&testfiles).await;
startpage_cont(&testfiles).await;
trending(&testfiles).await;
pub async fn download_testfiles() {
player().await;
player_model().await;
playlist().await;
playlist_cont().await;
video_details().await;
comments_top().await;
comments_latest().await;
recommendations().await;
channel_videos().await;
channel_shorts().await;
channel_livestreams().await;
channel_playlists().await;
channel_info().await;
channel_videos_cont().await;
channel_playlists_cont().await;
search().await;
search_cont().await;
search_playlists().await;
search_empty().await;
startpage().await;
startpage_cont().await;
trending().await;
music_playlist(&testfiles).await;
music_playlist_cont(&testfiles).await;
music_playlist_related(&testfiles).await;
music_album(&testfiles).await;
music_search(&testfiles).await;
music_search_tracks(&testfiles).await;
music_search_albums(&testfiles).await;
music_search_artists(&testfiles).await;
music_search_playlists(&testfiles).await;
music_search_cont(&testfiles).await;
music_search_suggestion(&testfiles).await;
music_artist(&testfiles).await;
music_details(&testfiles).await;
music_lyrics(&testfiles).await;
music_related(&testfiles).await;
music_radio(&testfiles).await;
music_radio_cont(&testfiles).await;
music_new_albums(&testfiles).await;
music_new_videos(&testfiles).await;
music_charts(&testfiles).await;
music_genres(&testfiles).await;
music_genre(&testfiles).await;
music_playlist().await;
music_playlist_cont().await;
music_playlist_related().await;
music_album().await;
music_search().await;
music_search_tracks().await;
music_search_albums().await;
music_search_artists().await;
music_search_playlists().await;
music_search_cont().await;
music_search_suggestion().await;
music_artist().await;
music_details().await;
music_lyrics().await;
music_related().await;
music_radio().await;
music_radio_cont().await;
music_new_albums().await;
music_new_videos().await;
music_charts().await;
music_genres().await;
music_genre().await;
}
const CLIENT_TYPES: [ClientType; 5] = [
@ -136,14 +136,12 @@ fn rp_testfile(json_path: &Path) -> RustyPipe {
.build()
}
async fn player(testfiles: &Path) {
async fn player() {
let video_id = "pPvd8UxmSbQ";
for client_type in CLIENT_TYPES {
let mut json_path = testfiles.to_path_buf();
json_path.push("player");
json_path.push(format!("{client_type:?}_video.json").to_lowercase());
let json_path =
path!(*TESTFILES_DIR / "player" / format!("{client_type:?}_video.json").to_lowercase());
if json_path.exists() {
continue;
}
@ -156,14 +154,12 @@ async fn player(testfiles: &Path) {
}
}
async fn player_model(testfiles: &Path) {
async fn player_model() {
let rp = RustyPipe::builder().strict().build();
for (name, id) in [("multilanguage", "tVWWp1PqDus"), ("hdr", "LXb3EKWsInQ")] {
let mut json_path = testfiles.to_path_buf();
json_path.push("player_model");
json_path.push(format!("{name}.json").to_lowercase());
let json_path =
path!(*TESTFILES_DIR / "player_model" / format!("{name}.json").to_lowercase());
if json_path.exists() {
continue;
}
@ -180,15 +176,13 @@ async fn player_model(testfiles: &Path) {
}
}
async fn playlist(testfiles: &Path) {
async fn playlist() {
for (name, id) in [
("short", "RDCLAK5uy_kFQXdnqMaQCVx2wpUM4ZfbsGCDibZtkJk"),
("long", "PL5dDx681T4bR7ZF1IuWzOv1omlRbE7PiJ"),
("nomusic", "PL1J-6JOckZtE_P9Xx8D3b2O6w0idhuKBe"),
] {
let mut json_path = testfiles.to_path_buf();
json_path.push("playlist");
json_path.push(format!("playlist_{name}.json"));
let json_path = path!(*TESTFILES_DIR / "playlist" / format!("playlist_{name}.json"));
if json_path.exists() {
continue;
}
@ -198,10 +192,8 @@ async fn playlist(testfiles: &Path) {
}
}
async fn playlist_cont(testfiles: &Path) {
let mut json_path = testfiles.to_path_buf();
json_path.push("playlist");
json_path.push("playlist_cont.json");
async fn playlist_cont() {
let json_path = path!(*TESTFILES_DIR / "playlist" / "playlist_cont.json");
if json_path.exists() {
return;
}
@ -217,7 +209,7 @@ async fn playlist_cont(testfiles: &Path) {
playlist.videos.next(rp.query()).await.unwrap().unwrap();
}
async fn video_details(testfiles: &Path) {
async fn video_details() {
for (name, id) in [
("music", "XuM2onMGvTI"),
("mv", "ZeerrnuLi5E"),
@ -226,9 +218,8 @@ async fn video_details(testfiles: &Path) {
("live", "86YLFOog4GM"),
("agegate", "HRKu0cvrr_o"),
] {
let mut json_path = testfiles.to_path_buf();
json_path.push("video_details");
json_path.push(format!("video_details_{name}.json"));
let json_path =
path!(*TESTFILES_DIR / "video_details" / format!("video_details_{name}.json"));
if json_path.exists() {
continue;
}
@ -238,10 +229,8 @@ async fn video_details(testfiles: &Path) {
}
}
async fn comments_top(testfiles: &Path) {
let mut json_path = testfiles.to_path_buf();
json_path.push("video_details");
json_path.push("comments_top.json");
async fn comments_top() {
let json_path = path!(*TESTFILES_DIR / "video_details" / "comments_top.json");
if json_path.exists() {
return;
}
@ -258,10 +247,8 @@ async fn comments_top(testfiles: &Path) {
.unwrap();
}
async fn comments_latest(testfiles: &Path) {
let mut json_path = testfiles.to_path_buf();
json_path.push("video_details");
json_path.push("comments_latest.json");
async fn comments_latest() {
let json_path = path!(*TESTFILES_DIR / "video_details" / "comments_latest.json");
if json_path.exists() {
return;
}
@ -278,10 +265,8 @@ async fn comments_latest(testfiles: &Path) {
.unwrap();
}
async fn recommendations(testfiles: &Path) {
let mut json_path = testfiles.to_path_buf();
json_path.push("video_details");
json_path.push("recommendations.json");
async fn recommendations() {
let json_path = path!(*TESTFILES_DIR / "video_details" / "recommendations.json");
if json_path.exists() {
return;
}
@ -293,7 +278,7 @@ async fn recommendations(testfiles: &Path) {
details.recommended.next(rp.query()).await.unwrap();
}
async fn channel_videos(testfiles: &Path) {
async fn channel_videos() {
for (name, id) in [
("base", "UC2DjFE7Xf11URZqWBigcVOQ"),
("music", "UC_vmjW5e1xEHhYjY2a0kK1A"), // YouTube Music channels have no videos
@ -302,9 +287,7 @@ async fn channel_videos(testfiles: &Path) {
("empty", "UCxBa895m48H5idw5li7h-0g"),
("upcoming", "UCcvfHa-GHSOHFAjU0-Ie57A"),
] {
let mut json_path = testfiles.to_path_buf();
json_path.push("channel");
json_path.push(format!("channel_videos_{name}.json"));
let json_path = path!(*TESTFILES_DIR / "channel" / format!("channel_videos_{name}.json"));
if json_path.exists() {
continue;
}
@ -314,10 +297,8 @@ async fn channel_videos(testfiles: &Path) {
}
}
async fn channel_shorts(testfiles: &Path) {
let mut json_path = testfiles.to_path_buf();
json_path.push("channel");
json_path.push("channel_shorts.json");
async fn channel_shorts() {
let json_path = path!(*TESTFILES_DIR / "channel" / "channel_shorts.json");
if json_path.exists() {
return;
}
@ -329,10 +310,8 @@ async fn channel_shorts(testfiles: &Path) {
.unwrap();
}
async fn channel_livestreams(testfiles: &Path) {
let mut json_path = testfiles.to_path_buf();
json_path.push("channel");
json_path.push("channel_livestreams.json");
async fn channel_livestreams() {
let json_path = path!(*TESTFILES_DIR / "channel" / "channel_livestreams.json");
if json_path.exists() {
return;
}
@ -344,10 +323,8 @@ async fn channel_livestreams(testfiles: &Path) {
.unwrap();
}
async fn channel_playlists(testfiles: &Path) {
let mut json_path = testfiles.to_path_buf();
json_path.push("channel");
json_path.push("channel_playlists.json");
async fn channel_playlists() {
let json_path = path!(*TESTFILES_DIR / "channel" / "channel_playlists.json");
if json_path.exists() {
return;
}
@ -359,10 +336,8 @@ async fn channel_playlists(testfiles: &Path) {
.unwrap();
}
async fn channel_info(testfiles: &Path) {
let mut json_path = testfiles.to_path_buf();
json_path.push("channel");
json_path.push("channel_info.json");
async fn channel_info() {
let json_path = path!(*TESTFILES_DIR / "channel" / "channel_info.json");
if json_path.exists() {
return;
}
@ -374,10 +349,8 @@ async fn channel_info(testfiles: &Path) {
.unwrap();
}
async fn channel_videos_cont(testfiles: &Path) {
let mut json_path = testfiles.to_path_buf();
json_path.push("channel");
json_path.push("channel_videos_cont.json");
async fn channel_videos_cont() {
let json_path = path!(*TESTFILES_DIR / "channel" / "channel_videos_cont.json");
if json_path.exists() {
return;
}
@ -393,10 +366,8 @@ async fn channel_videos_cont(testfiles: &Path) {
videos.content.next(rp.query()).await.unwrap().unwrap();
}
async fn channel_playlists_cont(testfiles: &Path) {
let mut json_path = testfiles.to_path_buf();
json_path.push("channel");
json_path.push("channel_playlists_cont.json");
async fn channel_playlists_cont() {
let json_path = path!(*TESTFILES_DIR / "channel" / "channel_playlists_cont.json");
if json_path.exists() {
return;
}
@ -412,10 +383,8 @@ async fn channel_playlists_cont(testfiles: &Path) {
playlists.content.next(rp.query()).await.unwrap().unwrap();
}
async fn search(testfiles: &Path) {
let mut json_path = testfiles.to_path_buf();
json_path.push("search");
json_path.push("default.json");
async fn search() {
let json_path = path!(*TESTFILES_DIR / "search" / "default.json");
if json_path.exists() {
return;
}
@ -424,10 +393,8 @@ async fn search(testfiles: &Path) {
rp.query().search("doobydoobap").await.unwrap();
}
async fn search_cont(testfiles: &Path) {
let mut json_path = testfiles.to_path_buf();
json_path.push("search");
json_path.push("cont.json");
async fn search_cont() {
let json_path = path!(*TESTFILES_DIR / "search" / "cont.json");
if json_path.exists() {
return;
}
@ -439,10 +406,8 @@ async fn search_cont(testfiles: &Path) {
search.items.next(rp.query()).await.unwrap().unwrap();
}
async fn search_playlists(testfiles: &Path) {
let mut json_path = testfiles.to_path_buf();
json_path.push("search");
json_path.push("playlists.json");
async fn search_playlists() {
let json_path = path!(*TESTFILES_DIR / "search" / "playlists.json");
if json_path.exists() {
return;
}
@ -454,10 +419,8 @@ async fn search_playlists(testfiles: &Path) {
.unwrap();
}
async fn search_empty(testfiles: &Path) {
let mut json_path = testfiles.to_path_buf();
json_path.push("search");
json_path.push("empty.json");
async fn search_empty() {
let json_path = path!(*TESTFILES_DIR / "search" / "empty.json");
if json_path.exists() {
return;
}
@ -474,10 +437,8 @@ async fn search_empty(testfiles: &Path) {
.unwrap();
}
async fn startpage(testfiles: &Path) {
let mut json_path = testfiles.to_path_buf();
json_path.push("trends");
json_path.push("startpage.json");
async fn startpage() {
let json_path = path!(*TESTFILES_DIR / "trends" / "startpage.json");
if json_path.exists() {
return;
}
@ -486,10 +447,8 @@ async fn startpage(testfiles: &Path) {
rp.query().startpage().await.unwrap();
}
async fn startpage_cont(testfiles: &Path) {
let mut json_path = testfiles.to_path_buf();
json_path.push("trends");
json_path.push("startpage_cont.json");
async fn startpage_cont() {
let json_path = path!(*TESTFILES_DIR / "trends" / "startpage_cont.json");
if json_path.exists() {
return;
}
@ -501,10 +460,8 @@ async fn startpage_cont(testfiles: &Path) {
startpage.next(rp.query()).await.unwrap();
}
async fn trending(testfiles: &Path) {
let mut json_path = testfiles.to_path_buf();
json_path.push("trends");
json_path.push("trending.json");
async fn trending() {
let json_path = path!(*TESTFILES_DIR / "trends" / "trending_videos.json");
if json_path.exists() {
return;
}
@ -513,15 +470,13 @@ async fn trending(testfiles: &Path) {
rp.query().trending().await.unwrap();
}
async fn music_playlist(testfiles: &Path) {
async fn music_playlist() {
for (name, id) in [
("short", "RDCLAK5uy_kFQXdnqMaQCVx2wpUM4ZfbsGCDibZtkJk"),
("long", "PL5dDx681T4bR7ZF1IuWzOv1omlRbE7PiJ"),
("nomusic", "PL1J-6JOckZtE_P9Xx8D3b2O6w0idhuKBe"),
] {
let mut json_path = testfiles.to_path_buf();
json_path.push("music_playlist");
json_path.push(format!("playlist_{name}.json"));
let json_path = path!(*TESTFILES_DIR / "music_playlist" / format!("playlist_{name}.json"));
if json_path.exists() {
continue;
}
@ -531,10 +486,8 @@ async fn music_playlist(testfiles: &Path) {
}
}
async fn music_playlist_cont(testfiles: &Path) {
let mut json_path = testfiles.to_path_buf();
json_path.push("music_playlist");
json_path.push("playlist_cont.json");
async fn music_playlist_cont() {
let json_path = path!(*TESTFILES_DIR / "music_playlist" / "playlist_cont.json");
if json_path.exists() {
return;
}
@ -550,10 +503,8 @@ async fn music_playlist_cont(testfiles: &Path) {
playlist.tracks.next(rp.query()).await.unwrap().unwrap();
}
async fn music_playlist_related(testfiles: &Path) {
let mut json_path = testfiles.to_path_buf();
json_path.push("music_playlist");
json_path.push("playlist_related.json");
async fn music_playlist_related() {
let json_path = path!(*TESTFILES_DIR / "music_playlist" / "playlist_related.json");
if json_path.exists() {
return;
}
@ -574,7 +525,7 @@ async fn music_playlist_related(testfiles: &Path) {
.unwrap();
}
async fn music_album(testfiles: &Path) {
async fn music_album() {
for (name, id) in [
("one_artist", "MPREb_nlBWQROfvjo"),
("various_artists", "MPREb_8QkDeEIawvX"),
@ -582,9 +533,7 @@ async fn music_album(testfiles: &Path) {
("description", "MPREb_PiyfuVl6aYd"),
("unavailable", "MPREb_AzuWg8qAVVl"),
] {
let mut json_path = testfiles.to_path_buf();
json_path.push("music_playlist");
json_path.push(format!("album_{name}.json"));
let json_path = path!(*TESTFILES_DIR / "music_playlist" / format!("album_{name}.json"));
if json_path.exists() {
continue;
}
@ -594,16 +543,14 @@ async fn music_album(testfiles: &Path) {
}
}
async fn music_search(testfiles: &Path) {
async fn music_search() {
for (name, query) in [
("default", "black mamba"),
("typo", "liblingsmensch"),
("radio", "pop radio"),
("artist", "taylor swift"),
] {
let mut json_path = testfiles.to_path_buf();
json_path.push("music_search");
json_path.push(format!("main_{name}.json"));
let json_path = path!(*TESTFILES_DIR / "music_search" / format!("main_{name}.json"));
if json_path.exists() {
continue;
}
@ -613,7 +560,7 @@ async fn music_search(testfiles: &Path) {
}
}
async fn music_search_tracks(testfiles: &Path) {
async fn music_search_tracks() {
for (name, query, videos) in [
("default", "black mamba", false),
("videos", "black mamba", true),
@ -624,9 +571,7 @@ async fn music_search_tracks(testfiles: &Path) {
false,
),
] {
let mut json_path = testfiles.to_path_buf();
json_path.push("music_search");
json_path.push(format!("tracks_{name}.json"));
let json_path = path!(*TESTFILES_DIR / "music_search" / format!("tracks_{name}.json"));
if json_path.exists() {
continue;
}
@ -640,10 +585,8 @@ async fn music_search_tracks(testfiles: &Path) {
}
}
async fn music_search_albums(testfiles: &Path) {
let mut json_path = testfiles.to_path_buf();
json_path.push("music_search");
json_path.push("albums.json");
async fn music_search_albums() {
let json_path = path!(*TESTFILES_DIR / "music_search" / "albums.json");
if json_path.exists() {
return;
}
@ -652,10 +595,8 @@ async fn music_search_albums(testfiles: &Path) {
rp.query().music_search_albums("black mamba").await.unwrap();
}
async fn music_search_artists(testfiles: &Path) {
let mut json_path = testfiles.to_path_buf();
json_path.push("music_search");
json_path.push("artists.json");
async fn music_search_artists() {
let json_path = path!(*TESTFILES_DIR / "music_search" / "artists.json");
if json_path.exists() {
return;
}
@ -667,11 +608,9 @@ async fn music_search_artists(testfiles: &Path) {
.unwrap();
}
async fn music_search_playlists(testfiles: &Path) {
async fn music_search_playlists() {
for (name, community) in [("ytm", false), ("community", true)] {
let mut json_path = testfiles.to_path_buf();
json_path.push("music_search");
json_path.push(format!("playlists_{name}.json"));
let json_path = path!(*TESTFILES_DIR / "music_search" / format!("playlists_{name}.json"));
if json_path.exists() {
continue;
}
@ -684,10 +623,8 @@ async fn music_search_playlists(testfiles: &Path) {
}
}
async fn music_search_cont(testfiles: &Path) {
let mut json_path = testfiles.to_path_buf();
json_path.push("music_search");
json_path.push("tracks_cont.json");
async fn music_search_cont() {
let json_path = path!(*TESTFILES_DIR / "music_search" / "tracks_cont.json");
if json_path.exists() {
return;
}
@ -699,11 +636,9 @@ async fn music_search_cont(testfiles: &Path) {
res.items.next(rp.query()).await.unwrap().unwrap();
}
async fn music_search_suggestion(testfiles: &Path) {
async fn music_search_suggestion() {
for (name, query) in [("default", "t"), ("empty", "reujbhevmfndxnjrze")] {
let mut json_path = testfiles.to_path_buf();
json_path.push("music_search");
json_path.push(format!("suggestion_{name}.json"));
let json_path = path!(*TESTFILES_DIR / "music_search" / format!("suggestion_{name}.json"));
if json_path.exists() {
continue;
}
@ -713,7 +648,7 @@ async fn music_search_suggestion(testfiles: &Path) {
}
}
async fn music_artist(testfiles: &Path) {
async fn music_artist() {
for (name, id, all_albums) in [
("default", "UClmXPfaYhXOYsNn_QUyheWQ", true),
("no_more_albums", "UC_vmjW5e1xEHhYjY2a0kK1A", true),
@ -722,9 +657,7 @@ async fn music_artist(testfiles: &Path) {
("only_more_singles", "UC0aXrjVxG5pZr99v77wZdPQ", true),
("secondary_channel", "UCC9192yGQD25eBZgFZ84MPw", false),
] {
let mut json_path = testfiles.to_path_buf();
json_path.push("music_artist");
json_path.push(format!("artist_{name}.json"));
let json_path = path!(*TESTFILES_DIR / "music_artist" / format!("artist_{name}.json"));
if json_path.exists() {
continue;
}
@ -734,11 +667,9 @@ async fn music_artist(testfiles: &Path) {
}
}
async fn music_details(testfiles: &Path) {
async fn music_details() {
for (name, id) in [("mv", "ZeerrnuLi5E"), ("track", "7nigXQS1Xb0")] {
let mut json_path = testfiles.to_path_buf();
json_path.push("music_details");
json_path.push(format!("details_{name}.json"));
let json_path = path!(*TESTFILES_DIR / "music_details" / format!("details_{name}.json"));
if json_path.exists() {
continue;
}
@ -748,10 +679,8 @@ async fn music_details(testfiles: &Path) {
}
}
async fn music_lyrics(testfiles: &Path) {
let mut json_path = testfiles.to_path_buf();
json_path.push("music_details");
json_path.push("lyrics.json");
async fn music_lyrics() {
let json_path = path!(*TESTFILES_DIR / "music_details" / "lyrics.json");
if json_path.exists() {
return;
}
@ -766,10 +695,8 @@ async fn music_lyrics(testfiles: &Path) {
.unwrap();
}
async fn music_related(testfiles: &Path) {
let mut json_path = testfiles.to_path_buf();
json_path.push("music_details");
json_path.push("related.json");
async fn music_related() {
let json_path = path!(*TESTFILES_DIR / "music_details" / "related.json");
if json_path.exists() {
return;
}
@ -784,11 +711,9 @@ async fn music_related(testfiles: &Path) {
.unwrap();
}
async fn music_radio(testfiles: &Path) {
async fn music_radio() {
for (name, id) in [("mv", "RDAMVMZeerrnuLi5E"), ("track", "RDAMVM7nigXQS1Xb0")] {
let mut json_path = testfiles.to_path_buf();
json_path.push("music_details");
json_path.push(format!("radio_{name}.json"));
let json_path = path!(*TESTFILES_DIR / "music_details" / format!("radio_{name}.json"));
if json_path.exists() {
continue;
}
@ -798,10 +723,8 @@ async fn music_radio(testfiles: &Path) {
}
}
async fn music_radio_cont(testfiles: &Path) {
let mut json_path = testfiles.to_path_buf();
json_path.push("music_details");
json_path.push("radio_cont.json");
async fn music_radio_cont() {
let json_path = path!(*TESTFILES_DIR / "music_details" / "radio_cont.json");
if json_path.exists() {
return;
}
@ -813,10 +736,8 @@ async fn music_radio_cont(testfiles: &Path) {
res.next(rp.query()).await.unwrap().unwrap();
}
async fn music_new_albums(testfiles: &Path) {
let mut json_path = testfiles.to_path_buf();
json_path.push("music_new");
json_path.push("albums_default.json");
async fn music_new_albums() {
let json_path = path!(*TESTFILES_DIR / "music_new" / "albums_default.json");
if json_path.exists() {
return;
}
@ -825,10 +746,8 @@ async fn music_new_albums(testfiles: &Path) {
rp.query().music_new_albums().await.unwrap();
}
async fn music_new_videos(testfiles: &Path) {
let mut json_path = testfiles.to_path_buf();
json_path.push("music_new");
json_path.push("videos_default.json");
async fn music_new_videos() {
let json_path = path!(*TESTFILES_DIR / "music_new" / "videos_default.json");
if json_path.exists() {
return;
}
@ -837,11 +756,9 @@ async fn music_new_videos(testfiles: &Path) {
rp.query().music_new_videos().await.unwrap();
}
async fn music_charts(testfiles: &Path) {
async fn music_charts() {
for (name, country) in [("global", Some(Country::Zz)), ("US", Some(Country::Us))] {
let mut json_path = testfiles.to_path_buf();
json_path.push("music_charts");
json_path.push(&format!("charts_{name}.json"));
let json_path = path!(*TESTFILES_DIR / "music_charts" / format!("charts_{name}.json"));
if json_path.exists() {
continue;
}
@ -851,10 +768,8 @@ async fn music_charts(testfiles: &Path) {
}
}
async fn music_genres(testfiles: &Path) {
let mut json_path = testfiles.to_path_buf();
json_path.push("music_genres");
json_path.push("genres.json");
async fn music_genres() {
let json_path = path!(*TESTFILES_DIR / "music_genres" / "genres.json");
if json_path.exists() {
return;
}
@ -863,14 +778,12 @@ async fn music_genres(testfiles: &Path) {
rp.query().music_genres().await.unwrap();
}
async fn music_genre(testfiles: &Path) {
async fn music_genre() {
for (name, id) in [
("default", "ggMPOg1uX1lMbVZmbzl6NlJ3"),
("mood", "ggMPOg1uX1JOQWZFeDByc2Jm"),
] {
let mut json_path = testfiles.to_path_buf();
json_path.push("music_genres");
json_path.push(&format!("genre_{name}.json"));
let json_path = path!(*TESTFILES_DIR / "music_genres" / format!("genre_{name}.json"));
if json_path.exists() {
continue;
}

View file

@ -1,13 +1,13 @@
use std::fmt::Write;
use std::path::Path;
use once_cell::sync::Lazy;
use path_macro::path;
use regex::Regex;
use rustypipe::timeago::TimeUnit;
use crate::util;
const TARGET_PATH: &str = "src/util/dictionary.rs";
use crate::{
model::TimeUnit,
util::{self, SRC_DIR},
};
fn parse_tu(tu: &str) -> (u8, Option<TimeUnit>) {
static TU_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"^(\d*)(\w?)$").unwrap());
@ -30,8 +30,8 @@ fn parse_tu(tu: &str) -> (u8, Option<TimeUnit>) {
}
}
pub fn generate_dictionary(project_root: &Path) {
let dict = util::read_dict(project_root);
pub fn generate_dictionary() {
let dict = util::read_dict();
let code_head = r#"// This file is automatically generated. DO NOT EDIT.
// See codegen/gen_dictionary.rs for the generation code.
@ -169,7 +169,6 @@ pub(crate) fn entry(lang: Language) -> Entry {
let code = format!("{code_head}\n{code_timeago_tokens}");
let mut target_path = project_root.to_path_buf();
target_path.push(TARGET_PATH);
let target_path = path!(*SRC_DIR / "util" / "dictionary.rs");
std::fs::write(target_path, code).unwrap();
}

View file

@ -1,14 +1,15 @@
use std::collections::BTreeMap;
use std::fmt::Write;
use std::path::Path;
use path_macro::path;
use reqwest::header;
use reqwest::Client;
use serde::Deserialize;
use serde_with::serde_as;
use serde_with::VecSkipError;
use crate::util::Text;
use crate::model::Text;
use crate::util::SRC_DIR;
#[serde_as]
#[derive(Clone, Debug, Deserialize)]
@ -137,7 +138,7 @@ struct LanguageCountryCommand {
hl: String,
}
pub async fn generate_locales(project_root: &Path) {
pub async fn generate_locales() {
let (languages, countries) = get_locales().await;
let code_head = r#"// This file is automatically generated. DO NOT EDIT.
@ -288,8 +289,7 @@ pub enum Country {
"{code_head}\n{code_langs}\n{code_countries}\n{code_lang_array}\n{code_country_array}\n{code_lang_names}\n{code_country_names}\n{code_foot}"
);
let mut target_path = project_root.to_path_buf();
target_path.push("src/param/locale.rs");
let target_path = path!(*SRC_DIR / "param" / "locale.rs");
std::fs::write(target_path, code).unwrap();
}

View file

@ -2,21 +2,19 @@ mod abtest;
mod collect_album_types;
mod collect_large_numbers;
mod collect_playlist_dates;
mod collect_video_durations;
mod download_testfiles;
mod gen_dictionary;
mod gen_locales;
mod model;
mod util;
use std::path::PathBuf;
use clap::{Parser, Subcommand};
#[derive(Parser)]
struct Cli {
#[clap(subcommand)]
command: Commands,
#[clap(short = 'd', default_value = "..")]
project_root: PathBuf,
#[clap(short, default_value = "8")]
concurrency: usize,
}
@ -26,6 +24,7 @@ enum Commands {
CollectPlaylistDates,
CollectLargeNumbers,
CollectAlbumTypes,
CollectVideoDurations,
ParsePlaylistDates,
ParseLargeNumbers,
ParseAlbumTypes,
@ -47,28 +46,25 @@ async fn main() {
match cli.command {
Commands::CollectPlaylistDates => {
collect_playlist_dates::collect_dates(&cli.project_root, cli.concurrency).await;
collect_playlist_dates::collect_dates(cli.concurrency).await;
}
Commands::CollectLargeNumbers => {
collect_large_numbers::collect_large_numbers(&cli.project_root, cli.concurrency).await;
collect_large_numbers::collect_large_numbers(cli.concurrency).await;
}
Commands::CollectAlbumTypes => {
collect_album_types::collect_album_types(&cli.project_root, cli.concurrency).await;
collect_album_types::collect_album_types(cli.concurrency).await;
}
Commands::ParsePlaylistDates => {
collect_playlist_dates::write_samples_to_dict(&cli.project_root)
Commands::CollectVideoDurations => {
collect_video_durations::collect_video_durations(cli.concurrency).await;
}
Commands::ParseLargeNumbers => {
collect_large_numbers::write_samples_to_dict(&cli.project_root)
}
Commands::ParseAlbumTypes => collect_album_types::write_samples_to_dict(&cli.project_root),
Commands::ParsePlaylistDates => collect_playlist_dates::write_samples_to_dict(),
Commands::ParseLargeNumbers => collect_large_numbers::write_samples_to_dict(),
Commands::ParseAlbumTypes => collect_album_types::write_samples_to_dict(),
Commands::GenLocales => {
gen_locales::generate_locales(&cli.project_root).await;
}
Commands::GenDict => gen_dictionary::generate_dictionary(&cli.project_root),
Commands::DownloadTestfiles => {
download_testfiles::download_testfiles(&cli.project_root).await
gen_locales::generate_locales().await;
}
Commands::GenDict => gen_dictionary::generate_dictionary(),
Commands::DownloadTestfiles => download_testfiles::download_testfiles().await,
Commands::AbTest { id, n } => {
match id {
Some(id) => {

260
codegen/src/model.rs Normal file
View file

@ -0,0 +1,260 @@
use std::collections::BTreeMap;
use rustypipe::{client::YTContext, model::AlbumType, param::Language};
use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DefaultOnError, VecSkipError};
#[derive(Debug, Default, Serialize, Deserialize)]
#[serde(default)]
pub struct DictEntry {
/// List of languages that should be treated equally (e.g. EnUs/EnGb/EnIn)
pub equivalent: Vec<Language>,
/// Should the language be parsed by character instead of by word?
/// (e.g. Chinese/Japanese)
pub by_char: bool,
/// Tokens for parsing timeago strings.
///
/// Format: Parsed token -> \[Quantity\] Identifier
///
/// Identifiers: `Y`(ear), `M`(month), `W`(eek), `D`(ay),
/// `h`(our), `m`(inute), `s`(econd)
pub timeago_tokens: BTreeMap<String, String>,
/// Order in which to parse numeric date components. Formatted as
/// a string of date identifiers (Y, M, D).
///
/// Examples:
///
/// - 03.01.2020 => `"DMY"`
/// - Jan 3, 2020 => `"DY"`
pub date_order: String,
/// Tokens for parsing month names.
///
/// Format: Parsed token -> Month number (starting from 1)
pub months: BTreeMap<String, u8>,
/// Tokens for parsing date strings with no digits (e.g. Today, Tomorrow)
///
/// Format: Parsed token -> \[Quantity\] Identifier
pub timeago_nd_tokens: BTreeMap<String, String>,
/// Are commas (instead of points) used as decimal separators?
pub comma_decimal: bool,
/// Tokens for parsing decimal prefixes (K, M, B, ...)
///
/// Format: Parsed token -> decimal power
pub number_tokens: BTreeMap<String, u8>,
/// Tokens for parsing number strings with no digits (e.g. "No videos")
///
/// Format: Parsed token -> value
pub number_nd_tokens: BTreeMap<String, u8>,
/// Names of album types (Album, Single, ...)
///
/// Format: Parsed text -> Album type
pub album_types: BTreeMap<String, AlbumType>,
}
/// Parsed time unit
#[derive(Debug, Copy, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[serde(rename_all = "lowercase")]
pub enum TimeUnit {
Second,
Minute,
Hour,
Day,
Week,
Month,
Year,
}
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct QBrowse<'a> {
pub context: YTContext<'a>,
pub browse_id: &'a str,
#[serde(skip_serializing_if = "Option::is_none")]
pub params: Option<&'a str>,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
pub struct QCont<'a> {
pub context: YTContext<'a>,
pub continuation: &'a str,
}
#[derive(Clone, Debug, Deserialize)]
pub struct TextRuns {
pub runs: Vec<Text>,
}
#[derive(Clone, Debug, Deserialize)]
pub struct Text {
#[serde(alias = "simpleText")]
pub text: String,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Channel {
pub contents: Contents,
pub header: ChannelHeader,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ChannelHeader {
pub c4_tabbed_header_renderer: HeaderRenderer,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct HeaderRenderer {
pub subscriber_count_text: Text,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Contents {
pub two_column_browse_results_renderer: TabsRenderer,
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct TabsRenderer {
#[serde_as(as = "VecSkipError<_>")]
pub tabs: Vec<TabRendererWrap>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct TabRendererWrap {
pub tab_renderer: TabRenderer,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct TabRenderer {
pub content: RichGridRendererWrap,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct RichGridRendererWrap {
pub rich_grid_renderer: RichGridRenderer,
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct RichGridRenderer {
#[serde_as(as = "VecSkipError<_>")]
pub contents: Vec<RichItemRendererWrap>,
#[serde(default)]
#[serde_as(as = "DefaultOnError")]
pub header: Option<RichGridHeader>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct RichItemRendererWrap {
pub rich_item_renderer: RichItemRenderer,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct RichItemRenderer {
pub content: VideoRendererWrap,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct VideoRendererWrap {
pub video_renderer: VideoRenderer,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct VideoRenderer {
/// `24,194 views`
pub view_count_text: Text,
/// `19K views`
pub short_view_count_text: Text,
pub length_text: LengthText,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct LengthText {
/// `18 minutes, 26 seconds`
pub accessibility: Accessibility,
/// `18:26`
pub simple_text: String,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Accessibility {
pub accessibility_data: AccessibilityData,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct AccessibilityData {
pub label: String,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct RichGridHeader {
pub feed_filter_chip_bar_renderer: ChipBar,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ChipBar {
pub contents: Vec<Chip>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Chip {
pub chip_cloud_chip_renderer: ChipRenderer,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ChipRenderer {
pub navigation_endpoint: NavigationEndpoint,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct NavigationEndpoint {
pub continuation_command: ContinuationCommand,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ContinuationCommand {
pub token: String,
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ContinuationResponse {
pub on_response_received_actions: Vec<ContinuationAction>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ContinuationAction {
pub reload_continuation_items_command: ContinuationItemsWrap,
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ContinuationItemsWrap {
#[serde_as(as = "VecSkipError<_>")]
pub continuation_items: Vec<RichItemRendererWrap>,
}

View file

@ -1,120 +1,50 @@
use std::{
collections::BTreeMap,
fs::File,
io::BufReader,
path::{Path, PathBuf},
str::FromStr,
};
use std::{collections::BTreeMap, fs::File, io::BufReader, path::PathBuf, str::FromStr};
use once_cell::sync::Lazy;
use path_macro::path;
use rustypipe::{client::YTContext, model::AlbumType, param::Language};
use regex::Regex;
use rustypipe::param::Language;
use serde::{Deserialize, Serialize};
static DICT_PATH: Lazy<PathBuf> = Lazy::new(|| path!("testfiles" / "dict" / "dictionary.json"));
static DICT_OVERRIDE_PATH: Lazy<PathBuf> =
Lazy::new(|| path!("testfiles" / "dict" / "dictionary_override.json"));
use crate::model::DictEntry;
/// Get the path of the `testfiles` directory
pub static TESTFILES_DIR: Lazy<PathBuf> = Lazy::new(|| {
path!(env!("CARGO_MANIFEST_DIR") / ".." / "testfiles")
.canonicalize()
.unwrap()
});
/// Get the path of the `dict` directory
pub static DICT_DIR: Lazy<PathBuf> = Lazy::new(|| path!(*TESTFILES_DIR / "dict"));
/// Get the path of the `src` directory
pub static SRC_DIR: Lazy<PathBuf> = Lazy::new(|| path!(env!("CARGO_MANIFEST_DIR") / ".." / "src"));
type Dictionary = BTreeMap<Language, DictEntry>;
type DictionaryOverride = BTreeMap<Language, DictOverrideEntry>;
#[derive(Debug, Default, Serialize, Deserialize)]
#[serde(default)]
pub struct DictEntry {
/// List of languages that should be treated equally (e.g. EnUs/EnGb/EnIn)
pub equivalent: Vec<Language>,
/// Should the language be parsed by character instead of by word?
/// (e.g. Chinese/Japanese)
pub by_char: bool,
/// Tokens for parsing timeago strings.
///
/// Format: Parsed token -> \[Quantity\] Identifier
///
/// Identifiers: `Y`(ear), `M`(month), `W`(eek), `D`(ay),
/// `h`(our), `m`(inute), `s`(econd)
pub timeago_tokens: BTreeMap<String, String>,
/// Order in which to parse numeric date components. Formatted as
/// a string of date identifiers (Y, M, D).
///
/// Examples:
///
/// - 03.01.2020 => `"DMY"`
/// - Jan 3, 2020 => `"DY"`
pub date_order: String,
/// Tokens for parsing month names.
///
/// Format: Parsed token -> Month number (starting from 1)
pub months: BTreeMap<String, u8>,
/// Tokens for parsing date strings with no digits (e.g. Today, Tomorrow)
///
/// Format: Parsed token -> \[Quantity\] Identifier
pub timeago_nd_tokens: BTreeMap<String, String>,
/// Are commas (instead of points) used as decimal separators?
pub comma_decimal: bool,
/// Tokens for parsing decimal prefixes (K, M, B, ...)
///
/// Format: Parsed token -> decimal power
pub number_tokens: BTreeMap<String, u8>,
/// Tokens for parsing number strings with no digits (e.g. "No videos")
///
/// Format: Parsed token -> value
pub number_nd_tokens: BTreeMap<String, u8>,
/// Names of album types (Album, Single, ...)
///
/// Format: Parsed text -> Album type
pub album_types: BTreeMap<String, AlbumType>,
struct DictOverrideEntry {
number_tokens: BTreeMap<String, Option<u8>>,
number_nd_tokens: BTreeMap<String, Option<u8>>,
}
#[derive(Debug, Default, Serialize, Deserialize)]
#[serde(default)]
pub struct DictOverrideEntry {
pub number_tokens: BTreeMap<String, Option<u8>>,
pub number_nd_tokens: BTreeMap<String, Option<u8>>,
}
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct QBrowse<'a> {
pub context: YTContext<'a>,
pub browse_id: &'a str,
#[serde(skip_serializing_if = "Option::is_none")]
pub params: Option<&'a str>,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
pub struct QCont<'a> {
pub context: YTContext<'a>,
pub continuation: &'a str,
}
#[derive(Clone, Debug, Deserialize)]
pub struct TextRuns {
pub runs: Vec<Text>,
}
#[derive(Clone, Debug, Deserialize)]
pub struct Text {
#[serde(alias = "simpleText")]
pub text: String,
}
pub fn read_dict(project_root: &Path) -> Dictionary {
let json_path = path!(project_root / *DICT_PATH);
pub fn read_dict() -> Dictionary {
let json_path = path!(*DICT_DIR / "dictionary.json");
let json_file = File::open(json_path).unwrap();
serde_json::from_reader(BufReader::new(json_file)).unwrap()
}
pub fn read_dict_override(project_root: &Path) -> DictionaryOverride {
let json_path = path!(project_root / *DICT_OVERRIDE_PATH);
fn read_dict_override() -> DictionaryOverride {
let json_path = path!(*DICT_DIR / "dictionary_override.json");
let json_file = File::open(json_path).unwrap();
serde_json::from_reader(BufReader::new(json_file)).unwrap()
}
pub fn write_dict(project_root: &Path, dict: Dictionary) {
let dict_override = read_dict_override(project_root);
pub fn write_dict(dict: Dictionary) {
let dict_override = read_dict_override();
let json_path = path!(project_root / *DICT_PATH);
let json_path = path!(*DICT_DIR / "dictionary.json");
let json_file = File::create(json_path).unwrap();
fn apply_map<K: Clone + Ord, V: Clone>(map: &mut BTreeMap<K, V>, or: &BTreeMap<K, Option<V>>) {
@ -251,3 +181,26 @@ pub fn parse_largenum_en(string: &str) -> Option<u64> {
num.checked_mul((10_u64).checked_pow(exp.try_into().ok()?)?)
}
/// Parse textual video length (e.g. `0:49`, `2:02` or `1:48:18`)
/// and return the duration in seconds.
pub fn parse_video_length(text: &str) -> Option<u32> {
static VIDEO_LENGTH_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"(?:(\d+)[:.])?(\d{1,2})[:.](\d{2})"#).unwrap());
VIDEO_LENGTH_REGEX.captures(text).map(|cap| {
let hrs = cap
.get(1)
.and_then(|x| x.as_str().parse::<u32>().ok())
.unwrap_or_default();
let min = cap
.get(2)
.and_then(|x| x.as_str().parse::<u32>().ok())
.unwrap_or_default();
let sec = cap
.get(3)
.and_then(|x| x.as_str().parse::<u32>().ok())
.unwrap_or_default();
hrs * 3600 + min * 60 + sec
})
}