From d6cfc7e914fdc88a4fb19632bc325ec9afb4218e Mon Sep 17 00:00:00 2001 From: ThetaDev Date: Fri, 16 Sep 2022 01:37:02 +0200 Subject: [PATCH] move codegen to separate crate --- Cargo.toml | 3 +- codegen/Cargo.toml | 20 +++ .../src}/collect_playlist_dates.rs | 115 +++++++++------ codegen/src/download_testfiles.rs | 132 ++++++++++++++++++ .../codegen => codegen/src}/gen_dictionary.rs | 44 +++--- {src/codegen => codegen/src}/gen_locales.rs | 91 ++++++------ codegen/src/main.rs | 50 +++++++ codegen/src/util.rs | 72 ++++++++++ src/client/mod.rs | 2 +- src/client/player.rs | 50 +------ src/client/playlist.rs | 23 +-- src/codegen/mod.rs | 35 ----- src/lib.rs | 6 +- src/report.rs | 21 --- src/timeago.rs | 2 +- 15 files changed, 423 insertions(+), 243 deletions(-) create mode 100644 codegen/Cargo.toml rename {src/codegen => codegen/src}/collect_playlist_dates.rs (70%) create mode 100644 codegen/src/download_testfiles.rs rename {src/codegen => codegen/src}/gen_dictionary.rs (71%) rename {src/codegen => codegen/src}/gen_locales.rs (78%) create mode 100644 codegen/src/main.rs create mode 100644 codegen/src/util.rs delete mode 100644 src/codegen/mod.rs diff --git a/Cargo.toml b/Cargo.toml index 2e4e6f5..1914cfb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ version = "0.1.0" edition = "2021" [workspace] -members = [".", "cli"] +members = [".", "codegen", "cli"] [features] default = ["default-tls", "yaml"] @@ -48,4 +48,3 @@ rstest = "0.15.0" temp_testdir = "0.2.3" insta = {version = "1.17.1", features = ["yaml", "redactions"]} velcro = "0.5.3" -phf_codegen = "0.11.1" diff --git a/codegen/Cargo.toml b/codegen/Cargo.toml new file mode 100644 index 0000000..ccda07c --- /dev/null +++ b/codegen/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "rustypipe-codegen" +version = "0.1.0" +edition = "2021" + +[dependencies] +rustypipe = {path = "../"} +reqwest = "0.11.11" +tokio = {version = "1.20.0", features = ["macros", "rt-multi-thread"]} +futures = "0.3.21" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0.82" +serde_with = "2.0.0" +anyhow = "1.0" +log = "0.4.17" +env_logger = "0.9.0" +clap = { version = "3.2.16", features = ["derive"] } +phf_codegen = "0.11.1" +once_cell = "1.12.0" +fancy-regex = "0.10.0" diff --git a/src/codegen/collect_playlist_dates.rs b/codegen/src/collect_playlist_dates.rs similarity index 70% rename from src/codegen/collect_playlist_dates.rs rename to codegen/src/collect_playlist_dates.rs index fd071c9..593d13b 100644 --- a/src/codegen/collect_playlist_dates.rs +++ b/codegen/src/collect_playlist_dates.rs @@ -1,5 +1,3 @@ -#![cfg(test)] - use std::{ collections::{BTreeMap, HashMap}, fs::File, @@ -8,14 +6,15 @@ use std::{ path::Path, }; +use futures::{stream, StreamExt}; +use rustypipe::{ + client::RustyPipe, + model::{locale::LANGUAGES, Language}, + timeago::{self, TimeAgo}, +}; use serde::{Deserialize, Serialize}; -use crate::{ - client::RustyTube, - model::{locale::LANGUAGES, Country, Language}, - timeago::{self, TimeAgo}, - util, -}; +use crate::util; type CollectedDates = BTreeMap>; @@ -38,20 +37,40 @@ enum DateCase { Dec, } -// #[test_log::test(tokio::test)] -async fn collect_dates() { - let json_path = Path::new("testfiles/date/playlist_samples.json").to_path_buf(); - if json_path.exists() { - return; - } +/// Collect 'Playlist updated' dates in every supported language +/// and write them to `testfiles/date/playlist_samples.json`. +/// +/// YouTube's API outputs the update date of playlists only in a +/// textual format (e.g. *Last updated on Jan 3, 2020*), which varies +/// by language. +/// +/// For recently updated playlists YouTube shows 'today', 'yesterday' +/// and 'x<=7 days ago' instead of the literal date. +/// +/// To parse these dates correctly we need to collect a sample set +/// in every language. +/// +/// This set includes +/// - one playlist updated today +/// - one playlist updated yesterday +/// - one playlist updated 2-7 days ago +/// - one playlist from every month. Note that there should not +/// be any dates which include the same number twice (e.g. 01.01.2020). +/// +/// Because the relative dates change with time, the first three playlists +/// should be checked and eventually changed before running the program. +pub async fn collect_dates(project_root: &Path, concurrency: usize) { + let mut json_path = project_root.to_path_buf(); + json_path.push("testfiles/date/playlist_samples.json"); + // These are the sample playlists let cases = [ ( DateCase::Today, "RDCLAK5uy_kj3rhiar1LINmyDcuFnXihEO0K1NQa2jI", ), - (DateCase::Yesterday, "PLmB6td997u3kUOrfFwkULZ910ho44oQSy"), - (DateCase::Ago, "PL7zsB-C3aNu2yRY2869T0zj1FhtRIu5am"), + (DateCase::Yesterday, "PL7zsB-C3aNu2yRY2869T0zj1FhtRIu5am"), + (DateCase::Ago, "PLmB6td997u3kUOrfFwkULZ910ho44oQSy"), (DateCase::Jan, "PL1J-6JOckZtFjcni6Xj1pLYglJp6JCpKD"), (DateCase::Feb, "PL1J-6JOckZtETrbzwZE7mRIIK6BzWNLAs"), (DateCase::Mar, "PL1J-6JOckZtG3AVdvBXhMO64mB2k3BtKi"), @@ -66,31 +85,42 @@ async fn collect_dates() { (DateCase::Dec, "PL1J-6JOckZtHo91uApeb10Qlf2XhkfM-9"), ]; - let mut collected_dates = CollectedDates::new(); + let rp = RustyPipe::default(); + let collected_dates = stream::iter(LANGUAGES) + .map(|lang| { + let rp = rp.clone(); + async move { + let mut map: BTreeMap = BTreeMap::new(); - for lang in LANGUAGES { - let rp = RustyTube::new_with_ua(lang, Country::Us, None); - let mut map: BTreeMap = BTreeMap::new(); + for (case, pl_id) in cases { + let playlist = rp.query().lang(lang).get_playlist(pl_id).await.unwrap(); + map.insert(case, playlist.last_update_txt.unwrap()); + } - for (case, pl_id) in cases { - let playlist = rp.get_playlist(pl_id).await.unwrap(); - map.insert(case, playlist.last_update_txt.unwrap()); - } - - collected_dates.insert(lang, map); - } + (lang, map) + } + }) + .buffer_unordered(concurrency) + .collect::>() + .await; let file = File::create(json_path).unwrap(); serde_json::to_writer_pretty(file, &collected_dates).unwrap(); } -// #[test] -fn write_samples_to_dict() { - let json_path = Path::new("testfiles/date/playlist_samples.json").to_path_buf(); +/// Attempt to parse the dates collected by `collect-playlist-dates` +/// and write the results to `dictionary.json`. +/// +/// The ND (no digit) tokens (today, tomorrow) of some languages cannot be +/// parsed automatically and require manual work. +pub fn write_samples_to_dict(project_root: &Path) { + let mut json_path = project_root.to_path_buf(); + json_path.push("testfiles/date/playlist_samples.json"); + let json_file = File::open(json_path).unwrap(); let collected_dates: CollectedDates = serde_json::from_reader(BufReader::new(json_file)).unwrap(); - let mut dict = super::read_dict(); + let mut dict = util::read_dict(project_root); let langs = dict.keys().map(|k| k.to_owned()).collect::>(); let months = [ @@ -134,7 +164,9 @@ fn write_samples_to_dict() { let dict_entry = dict.entry(lang).or_default(); let mut num_order = "".to_owned(); - let collect_nd_tokens = match lang { + let collect_nd_tokens = !matches!( + lang, + // ND tokens of these languages must be edited manually Language::Ja | Language::ZhCn | Language::ZhHk @@ -146,10 +178,9 @@ fn write_samples_to_dict() { | Language::Uz | Language::Te | Language::PtPt - // Singhalese YT translation is broken (today == tomorrow) - | Language::Si => false, - _ => true, - }; + // Singhalese YT translation has an error (today == tomorrow) + | Language::Si + ); dict_entry.months = BTreeMap::new(); @@ -164,7 +195,7 @@ fn write_samples_to_dict() { // Today/Yesterday { let mut parse = |string: &str, n: i8| { - timeago::filter_str(string) + util::filter_datestr(string) .split_whitespace() .for_each(|word| { td_words @@ -183,7 +214,7 @@ fn write_samples_to_dict() { // n days ago { let datestr = datestr_table.get(&DateCase::Ago).unwrap(); - let tago = timeago::parse_timeago(lang, &datestr); + let tago = timeago::parse_timeago(lang, datestr); assert_eq!( tago, Some(TimeAgo { @@ -201,7 +232,7 @@ fn write_samples_to_dict() { let datestr = datestr_table.get(m).unwrap(); // Get order of numbers - let nums = util::parse_numeric_vec::(&datestr); + let nums = util::parse_numeric_vec::(datestr); let date = dates[n]; let this_num_order = nums @@ -219,14 +250,14 @@ fn write_samples_to_dict() { }) .collect::(); - if num_order == "" { + if num_order.is_empty() { num_order = this_num_order; } else { assert_eq!(this_num_order, num_order, "lang: {}", lang); } // Insert words into the map - timeago::filter_str(&datestr) + util::filter_datestr(datestr) .split_whitespace() .for_each(|word| { month_words @@ -275,5 +306,5 @@ fn write_samples_to_dict() { dict_entry.date_order = num_order; } - super::write_dict(&dict); + util::write_dict(project_root, &dict); } diff --git a/codegen/src/download_testfiles.rs b/codegen/src/download_testfiles.rs new file mode 100644 index 0000000..4fbc0f3 --- /dev/null +++ b/codegen/src/download_testfiles.rs @@ -0,0 +1,132 @@ +use std::{ + fs::File, + path::{Path, PathBuf}, +}; + +use rustypipe::{ + cache::FileStorage, + client::{ClientType, RustyPipe}, + report::{Report, Reporter}, +}; + +const CLIENT_TYPES: [ClientType; 5] = [ + ClientType::Desktop, + ClientType::DesktopMusic, + ClientType::TvHtml5Embed, + ClientType::Android, + ClientType::Ios, +]; + +/// Store pretty-printed response json +pub struct TestFileReporter { + path: PathBuf, +} + +impl TestFileReporter { + pub fn new>(path: P) -> Self { + Self { + path: path.as_ref().to_path_buf(), + } + } +} + +impl Reporter for TestFileReporter { + fn report(&self, report: &Report) { + let data = + serde_json::from_str::(&report.http_request.resp_body).unwrap(); + let file = File::create(&self.path).unwrap(); + serde_json::to_writer_pretty(file, &data).unwrap(); + + println!("Downloaded {}", self.path.display()); + } +} + +fn rp_testfile(json_path: &Path) -> RustyPipe { + let reporter = TestFileReporter::new(json_path); + RustyPipe::new( + Some(Box::new(FileStorage::default())), + Some(Box::new(reporter)), + None, + ) +} + +pub async fn download_testfiles(project_root: &Path) { + let mut testfiles = project_root.to_path_buf(); + testfiles.push("testfiles"); + + tokio::join!( + player(&testfiles), + player_model(&testfiles), + playlist(&testfiles) + ); +} + +async fn player(testfiles: &Path) { + let video_id = "pPvd8UxmSbQ"; + + for client_type in CLIENT_TYPES { + let mut json_path = testfiles.to_path_buf(); + json_path.push("player"); + json_path.push(format!("{:?}_video.json", client_type).to_lowercase()); + + if json_path.exists() { + continue; + } + + let rp = rp_testfile(&json_path); + rp.query() + .report(true) + .strict(true) + .get_player(video_id, client_type) + .await + .unwrap(); + } +} + +async fn player_model(testfiles: &Path) { + let rp = RustyPipe::default(); + + for (name, id) in [("multilanguage", "tVWWp1PqDus"), ("hdr", "LXb3EKWsInQ")] { + let mut json_path = testfiles.to_path_buf(); + json_path.push("player_model"); + json_path.push(format!("{}.json", name).to_lowercase()); + + if json_path.exists() { + continue; + } + + let player_data = rp + .query() + .strict(true) + .get_player(id, ClientType::Desktop) + .await + .unwrap(); + let file = File::create(&json_path).unwrap(); + serde_json::to_writer_pretty(file, &player_data).unwrap(); + + println!("Downloaded {}", json_path.display()); + } +} + +async fn playlist(testfiles: &Path) { + for (name, id) in [ + ("short", "RDCLAK5uy_kFQXdnqMaQCVx2wpUM4ZfbsGCDibZtkJk"), + ("long", "PL5dDx681T4bR7ZF1IuWzOv1omlRbE7PiJ"), + ("nomusic", "PL1J-6JOckZtE_P9Xx8D3b2O6w0idhuKBe"), + ] { + let mut json_path = testfiles.to_path_buf(); + json_path.push("playlist"); + json_path.push(format!("playlist_{}.json", name)); + if json_path.exists() { + continue; + } + + let rp = rp_testfile(&json_path); + rp.query() + .report(true) + .strict(true) + .get_playlist(id) + .await + .unwrap(); + } +} diff --git a/src/codegen/gen_dictionary.rs b/codegen/src/gen_dictionary.rs similarity index 71% rename from src/codegen/gen_dictionary.rs rename to codegen/src/gen_dictionary.rs index be66a7c..ae86c0f 100644 --- a/src/codegen/gen_dictionary.rs +++ b/codegen/src/gen_dictionary.rs @@ -1,10 +1,13 @@ -#![cfg(test)] +use std::fmt::Write; +use std::path::Path; -use crate::{timeago::TimeUnit}; use fancy_regex::Regex; use once_cell::sync::Lazy; +use rustypipe::timeago::TimeUnit; -const TARGET_FILE: &str = "src/dictionary.rs"; +use crate::util; + +const TARGET_PATH: &str = "src/dictionary.rs"; fn parse_tu(tu: &str) -> (u8, Option) { static TU_PATTERN: Lazy = Lazy::new(|| Regex::new(r"^(\d*)(\w?)$").unwrap()); @@ -27,14 +30,13 @@ fn parse_tu(tu: &str) -> (u8, Option) { } } -// #[test] -fn generate_dictionary() { - let dict = super::read_dict(); +pub fn generate_dictionary(project_root: &Path) { + let dict = util::read_dict(project_root); let code_head = r#"// This file is automatically generated. DO NOT EDIT. use crate::{ model::Language, - timeago::{TaToken, TimeUnit, DateCmp}, + timeago::{DateCmp, TaToken, TimeUnit}, }; pub struct Entry { @@ -56,45 +58,45 @@ pub fn entry(lang: Language) -> Entry { // Match selector let mut selector = format!("Language::{:?}", lang); entry.equivalent.iter().for_each(|eq| { - selector += &format!(" | Language::{:?}", eq); + let _ = write!(selector, " | Language::{:?}", eq); }); // Timeago tokens let mut ta_tokens = phf_codegen::Map::<&str>::new(); entry.timeago_tokens.iter().for_each(|(txt, tu_str)| { - let (n, unit) = parse_tu(&tu_str); + let (n, unit) = parse_tu(tu_str); match unit { Some(unit) => ta_tokens.entry( - &txt, + txt, &format!("TaToken {{ n: {}, unit: Some(TimeUnit::{:?}) }}", n, unit), ), - None => ta_tokens.entry(&txt, &format!("TaToken {{ n: {}, unit: None }}", n)), + None => ta_tokens.entry(txt, &format!("TaToken {{ n: {}, unit: None }}", n)), }; }); // Months let mut months = phf_codegen::Map::<&str>::new(); entry.months.iter().for_each(|(txt, n_mon)| { - months.entry(&txt, &n_mon.to_string()); + months.entry(txt, &n_mon.to_string()); }); // Timeago(ND) tokens let mut ta_nd_tokens = phf_codegen::Map::<&str>::new(); entry.timeago_nd_tokens.iter().for_each(|(txt, tu_str)| { - let (n, unit) = parse_tu(&tu_str); + let (n, unit) = parse_tu(tu_str); match unit { Some(unit) => ta_nd_tokens.entry( - &txt, + txt, &format!("TaToken {{ n: {}, unit: Some(TimeUnit::{:?}) }}", n, unit), ), - None => ta_nd_tokens.entry(&txt, &format!("TaToken {{ n: {}, unit: None }}", n)), + None => ta_nd_tokens.entry(txt, &format!("TaToken {{ n: {}, unit: None }}", n)), }; }); // Date order let mut date_order = "&[".to_owned(); entry.date_order.chars().for_each(|c| { - date_order += &format!("DateCmp::{}, ", c); + let _ = write!(date_order, "DateCmp::{}, ", c); }); date_order = date_order.trim_end_matches([' ', ',']).to_owned() + "]"; @@ -102,15 +104,15 @@ pub fn entry(lang: Language) -> Entry { let code_ta_nd_tokens = &ta_nd_tokens.build().to_string().replace('\n', "\n "); let code_months = &months.build().to_string().replace('\n', "\n "); - code_timeago_tokens += &format!( - "{} => Entry {{\n by_char: {:?},\n timeago_tokens: {},\n date_order: {},\n months: {},\n timeago_nd_tokens: {},\n }},\n ", - selector, entry.by_char, code_ta_tokens, date_order, code_months, code_ta_nd_tokens - ); + let _ = write!(code_timeago_tokens, "{} => Entry {{\n by_char: {:?},\n timeago_tokens: {},\n date_order: {},\n months: {},\n timeago_nd_tokens: {},\n }},\n ", + selector, entry.by_char, code_ta_tokens, date_order, code_months, code_ta_nd_tokens); }); code_timeago_tokens = code_timeago_tokens.trim_end().to_owned() + "\n }\n}\n"; let code = format!("{}\n{}", code_head, code_timeago_tokens); - std::fs::write(TARGET_FILE, code).unwrap(); + let mut target_path = project_root.to_path_buf(); + target_path.push(TARGET_PATH); + std::fs::write(target_path, code).unwrap(); } diff --git a/src/codegen/gen_locales.rs b/codegen/src/gen_locales.rs similarity index 78% rename from src/codegen/gen_locales.rs rename to codegen/src/gen_locales.rs index 1225a05..55094b6 100644 --- a/src/codegen/gen_locales.rs +++ b/codegen/src/gen_locales.rs @@ -1,20 +1,13 @@ -#![cfg(test)] use std::collections::BTreeMap; +use std::fmt::Write; use std::path::Path; -use reqwest::Method; -use serde::{Deserialize, Serialize}; +use reqwest::header; +use reqwest::Client; +use serde::Deserialize; use serde_with::serde_as; use serde_with::VecSkipError; -use crate::client::{ClientType, ContextYT, RustyTube}; - -#[derive(Clone, Debug, Serialize)] -#[serde(rename_all = "camelCase")] -struct QLanguageMenu { - context: ContextYT, -} - #[serde_as] #[derive(Clone, Debug, Deserialize)] #[serde(rename_all = "camelCase")] @@ -122,12 +115,10 @@ struct LanguageItemWrap { compact_link_renderer: LanguageItem, } -#[serde_as] #[derive(Clone, Debug, Deserialize)] #[serde(rename_all = "camelCase")] struct LanguageItem { - #[serde_as(as = "crate::serializer::text::Text")] - title: String, + title: Text, service_endpoint: ServiceEndpoint, } @@ -144,9 +135,13 @@ struct LanguageCountryCommand { hl: String, } -// #[test_log::test(tokio::test)] -#[allow(dead_code)] -async fn generate_locales() { +#[derive(Clone, Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct Text { + simple_text: String, +} + +pub async fn generate_locales(project_root: &Path) { let (languages, countries) = get_locales().await; let code_head = r#"// This file is automatically generated. DO NOT EDIT. @@ -186,18 +181,21 @@ impl FromStr for Country { } "#; - let mut code_langs = r#"#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)] + let mut code_langs = + r#"#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)] #[serde(rename_all = "lowercase")] pub enum Language { "#.to_owned(); - let mut code_countries = r#"#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)] + let mut code_countries = + r#"#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)] #[serde(rename_all = "UPPERCASE")] pub enum Country { "#.to_owned(); let mut code_lang_array = format!("pub const LANGUAGES: [Language; {}] = [\n", languages.len()); - let mut code_country_array = format!("pub const COUNTRIES: [Country; {}] = [\n", countries.len()); + let mut code_country_array = + format!("pub const COUNTRIES: [Country; {}] = [\n", countries.len()); let mut code_lang_names = r#"impl Language { pub fn name(&self) -> &str { @@ -223,18 +221,22 @@ pub enum Country { .collect::(); // Language enum - code_langs += &format!(" /// {}\n ", n); + let _ = write!(code_langs, " /// {}\n ", n); if c.contains('-') { - code_langs += &format!("#[serde(rename = \"{}\")]\n ", c); + let _ = write!(code_langs, "#[serde(rename = \"{}\")]\n ", c); } code_langs += &enum_name; code_langs += ",\n"; // Language array - code_lang_array += &format!(" Language::{},\n", enum_name); + let _ = writeln!(code_lang_array, " Language::{},", enum_name); // Language names - code_lang_names += &format!(" Language::{} => \"{}\",\n", enum_name, n); + let _ = writeln!( + code_lang_names, + " Language::{} => \"{}\",", + enum_name, n + ); }); code_langs += "}\n"; @@ -242,14 +244,18 @@ pub enum Country { let enum_name = c[0..1].to_owned().to_uppercase() + &c[1..].to_owned().to_lowercase(); // Country enum - code_countries += &format!(" /// {}\n", n); - code_countries += &format!(" {},\n", enum_name); + let _ = writeln!(code_countries, " /// {}", n); + let _ = writeln!(code_countries, " {},", enum_name); // Country array - code_country_array += &format!(" Country::{},\n", enum_name); + let _ = writeln!(code_country_array, " Country::{},", enum_name); // Country names - code_country_names += &format!(" Country::{} => \"{}\",\n", enum_name, n); + let _ = writeln!( + code_country_names, + " Country::{} => \"{}\",", + enum_name, n + ); }); code_countries += "}\n"; @@ -267,26 +273,23 @@ pub enum Country { code_country_array, code_lang_names, code_country_names, - code_foot, + code_foot ); - let locale_path = Path::new("src/model/locale.rs"); - std::fs::write(locale_path, code).unwrap(); + let mut target_path = project_root.to_path_buf(); + target_path.push("src/model/locale.rs"); + std::fs::write(target_path, code).unwrap(); } async fn get_locales() -> (BTreeMap, BTreeMap) { - let rt = RustyTube::new(); - let client = rt.get_ytclient(ClientType::Desktop); - let context = client.get_context(true).await; - - let request_body = QLanguageMenu { context }; - + let client = Client::new(); let resp = client - .request_builder(Method::POST, "account/account_menu") - .await - .json(&request_body) - .send() - .await + .post("https://www.youtube.com/youtubei/v1/account/account_menu?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8") + .header(header::CONTENT_TYPE, "application/json") + .body( + r##"{"context":{"client":{"clientName":"WEB","clientVersion":"2.20220914.06.00","platform":"DESKTOP","originalUrl":"https://www.youtube.com/","hl":"en","gl":"US"},"request":{"internalExperimentFlags":[],"useSsl":true},"user":{"lockedSafetyMode":false}}}"## + ) + .send().await .unwrap() .error_for_status() .unwrap(); @@ -344,8 +347,8 @@ fn map_language_section(section: &CompactLinkRendererWrap) -> BTreeMap>() + .collect() } diff --git a/codegen/src/main.rs b/codegen/src/main.rs new file mode 100644 index 0000000..fc1efe0 --- /dev/null +++ b/codegen/src/main.rs @@ -0,0 +1,50 @@ +mod collect_playlist_dates; +mod download_testfiles; +mod gen_dictionary; +mod gen_locales; +mod util; + +use std::path::PathBuf; + +use clap::{Parser, Subcommand}; + +#[derive(Parser)] +struct Cli { + #[clap(subcommand)] + command: Commands, + #[clap(short = 'd', default_value = "..")] + project_root: PathBuf, + #[clap(short, default_value = "8")] + concurrency: usize, +} + +#[derive(Subcommand)] +enum Commands { + CollectPlaylistDates, + WritePlaylistDates, + GenLocales, + GenDict, + DownloadTestfiles, +} + +#[tokio::main] +async fn main() { + env_logger::init(); + let cli = Cli::parse(); + + match cli.command { + Commands::CollectPlaylistDates => { + collect_playlist_dates::collect_dates(&cli.project_root, cli.concurrency).await; + } + Commands::WritePlaylistDates => { + collect_playlist_dates::write_samples_to_dict(&cli.project_root); + } + Commands::GenLocales => { + gen_locales::generate_locales(&cli.project_root).await; + } + Commands::GenDict => gen_dictionary::generate_dictionary(&cli.project_root), + Commands::DownloadTestfiles => { + download_testfiles::download_testfiles(&cli.project_root).await + } + }; +} diff --git a/codegen/src/util.rs b/codegen/src/util.rs new file mode 100644 index 0000000..5925322 --- /dev/null +++ b/codegen/src/util.rs @@ -0,0 +1,72 @@ +use std::{collections::BTreeMap, fs::File, io::BufReader, path::Path, str::FromStr}; + +use rustypipe::model::Language; +use serde::{Deserialize, Serialize}; + +const DICT_PATH: &str = "testfiles/date/dictionary.json"; + +type Dictionary = BTreeMap; + +#[derive(Debug, Default, Serialize, Deserialize)] +#[serde(default)] +pub struct DictEntry { + pub equivalent: Vec, + pub by_char: bool, + pub timeago_tokens: BTreeMap, + pub date_order: String, + pub months: BTreeMap, + pub timeago_nd_tokens: BTreeMap, +} + +pub fn read_dict(project_root: &Path) -> Dictionary { + let mut json_path = project_root.to_path_buf(); + json_path.push(DICT_PATH); + let json_file = File::open(json_path).unwrap(); + serde_json::from_reader(BufReader::new(json_file)).unwrap() +} + +pub fn write_dict(project_root: &Path, dict: &Dictionary) { + let mut json_path = project_root.to_path_buf(); + json_path.push(DICT_PATH); + let json_file = File::create(json_path).unwrap(); + serde_json::to_writer_pretty(json_file, dict).unwrap(); +} + +pub fn filter_datestr(string: &str) -> String { + string + .to_lowercase() + .chars() + .filter_map(|c| { + if c == '\u{200b}' || c.is_ascii_digit() { + None + } else if c == '-' { + Some(' ') + } else { + Some(c) + } + }) + .collect() +} + +/// Parse all numbers occurring in a string and reurn them as a vec +pub fn parse_numeric_vec(string: &str) -> Vec +where + F: FromStr, +{ + let mut numbers = vec![]; + + let mut buf = String::new(); + for c in string.chars() { + if c.is_ascii_digit() { + buf.push(c); + } else if !buf.is_empty() { + buf.parse::().map_or((), |n| numbers.push(n)); + buf.clear(); + } + } + if !buf.is_empty() { + buf.parse::().map_or((), |n| numbers.push(n)); + } + + numbers +} diff --git a/src/client/mod.rs b/src/client/mod.rs index a93007d..62e7885 100644 --- a/src/client/mod.rs +++ b/src/client/mod.rs @@ -65,7 +65,7 @@ impl ClientType { #[derive(Clone, Debug, Serialize)] #[serde(rename_all = "camelCase")] -pub struct ContextYT { +struct ContextYT { client: ClientInfo, /// only used on desktop #[serde(skip_serializing_if = "Option::is_none")] diff --git a/src/client/player.rs b/src/client/player.rs index de7e38e..3d6dac3 100644 --- a/src/client/player.rs +++ b/src/client/player.rs @@ -563,11 +563,7 @@ fn get_audio_codec(codecs: Vec<&str>) -> AudioCodec { mod tests { use std::{fs::File, io::BufReader, path::Path}; - use crate::{ - client::{RustyPipe, CLIENT_TYPES}, - deobfuscate::DeobfData, - report::TestFileReporter, - }; + use crate::{client::RustyPipe, deobfuscate::DeobfData}; use super::*; use rstest::rstest; @@ -581,50 +577,6 @@ mod tests { }) }); - #[test_log::test(tokio::test)] - async fn download_response_testfiles() { - let tf_dir = Path::new("testfiles/player"); - let video_id = "pPvd8UxmSbQ"; - - for client_type in CLIENT_TYPES { - let mut json_path = tf_dir.to_path_buf(); - json_path.push(format!("{:?}_video.json", client_type).to_lowercase()); - if json_path.exists() { - continue; - } - - let reporter = TestFileReporter::new(json_path); - let rp = RustyPipe::new(None, Some(Box::new(reporter)), None); - rp.test_query() - .report(true) - .get_player(video_id, client_type) - .await - .unwrap(); - } - } - - #[test_log::test(tokio::test)] - async fn download_model_testfiles() { - let tf_dir = Path::new("testfiles/player_model"); - let rp = RustyPipe::new_test(); - - for (name, id) in [("multilanguage", "tVWWp1PqDus"), ("hdr", "LXb3EKWsInQ")] { - let mut json_path = tf_dir.to_path_buf(); - json_path.push(format!("{}.json", name).to_lowercase()); - if json_path.exists() { - continue; - } - - let player_data = rp - .test_query() - .get_player(id, ClientType::Desktop) - .await - .unwrap(); - let file = File::create(json_path).unwrap(); - serde_json::to_writer_pretty(file, &player_data).unwrap(); - } - } - #[rstest] #[case::desktop("desktop")] #[case::desktop_music("desktopmusic")] diff --git a/src/client/playlist.rs b/src/client/playlist.rs index 631a4e0..e83cf4c 100644 --- a/src/client/playlist.rs +++ b/src/client/playlist.rs @@ -299,7 +299,7 @@ mod tests { use rstest::rstest; - use crate::{client::RustyPipe, report::TestFileReporter}; + use crate::client::RustyPipe; use super::*; @@ -355,27 +355,6 @@ mod tests { assert!(!playlist.thumbnails.is_empty()); } - #[test_log::test(tokio::test)] - async fn download_testfiles() { - let tf_dir = Path::new("testfiles/playlist"); - - for (name, id) in [ - ("short", "RDCLAK5uy_kFQXdnqMaQCVx2wpUM4ZfbsGCDibZtkJk"), - ("long", "PL5dDx681T4bR7ZF1IuWzOv1omlRbE7PiJ"), - ("nomusic", "PL1J-6JOckZtE_P9Xx8D3b2O6w0idhuKBe"), - ] { - let mut json_path = tf_dir.to_path_buf(); - json_path.push(format!("playlist_{}.json", name)); - if json_path.exists() { - continue; - } - - let reporter = TestFileReporter::new(json_path); - let rp = RustyPipe::new(None, Some(Box::new(reporter)), None); - rp.test_query().report(true).get_playlist(id).await.unwrap(); - } - } - #[rstest] #[case::short("short", "RDCLAK5uy_kFQXdnqMaQCVx2wpUM4ZfbsGCDibZtkJk")] #[case::long("long", "PL5dDx681T4bR7ZF1IuWzOv1omlRbE7PiJ")] diff --git a/src/codegen/mod.rs b/src/codegen/mod.rs deleted file mode 100644 index 02c04fc..0000000 --- a/src/codegen/mod.rs +++ /dev/null @@ -1,35 +0,0 @@ -#![cfg(test)] - -use std::{collections::BTreeMap, fs::File, io::BufReader}; - -use serde::{Serialize, Deserialize}; - -use crate::model::Language; -mod collect_playlist_dates; -mod gen_dictionary; -mod gen_locales; - -const DICT_PATH: &str = "testfiles/date/dictionary.json"; - -type Dictionary = BTreeMap; - -#[derive(Debug, Default, Serialize, Deserialize)] -#[serde(default)] -struct DictEntry { - equivalent: Vec, - by_char: bool, - timeago_tokens: BTreeMap, - date_order: String, - months: BTreeMap, - timeago_nd_tokens: BTreeMap, -} - -fn read_dict() -> Dictionary { - let json_file = File::open(DICT_PATH).unwrap(); - serde_json::from_reader(BufReader::new(json_file)).unwrap() -} - -fn write_dict(dict: &Dictionary) { - let json_file = File::create(DICT_PATH).unwrap(); - serde_json::to_writer_pretty(json_file, dict).unwrap(); -} diff --git a/src/lib.rs b/src/lib.rs index 436f4e1..e4cd2a5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,18 +4,14 @@ #[macro_use] mod macros; -// #[cfg(test)] -// mod codegen; - mod deobfuscate; mod dictionary; mod serializer; -mod timeago; mod util; -// pub mod client; pub mod cache; pub mod client; pub mod download; pub mod model; pub mod report; +pub mod timeago; diff --git a/src/report.rs b/src/report.rs index 7406f6a..e4726f6 100644 --- a/src/report.rs +++ b/src/report.rs @@ -135,27 +135,6 @@ impl Reporter for YamlFileReporter { } } -#[cfg(test)] -pub struct TestFileReporter { - path: PathBuf, -} - -#[cfg(test)] -impl TestFileReporter { - pub fn new>(path: P) -> Self { - Self { - path: path.as_ref().to_path_buf(), - } - } -} - -#[cfg(test)] -impl Reporter for TestFileReporter { - fn report(&self, report: &Report) { - std::fs::write(&self.path, &report.http_request.resp_body).unwrap(); - } -} - fn get_report_path(root: &Path, report: &Report) -> Result { if !root.is_dir() { std::fs::create_dir_all(root)?; diff --git a/src/timeago.rs b/src/timeago.rs index 8d628da..ae6d997 100644 --- a/src/timeago.rs +++ b/src/timeago.rs @@ -78,7 +78,7 @@ impl From for DateTime { } } -pub fn filter_str(string: &str) -> String { +fn filter_str(string: &str) -> String { string .to_lowercase() .chars()