move codegen to separate crate

This commit is contained in:
ThetaDev 2022-09-16 01:37:02 +02:00
parent 8548bc81e9
commit d6cfc7e914
15 changed files with 423 additions and 243 deletions

View file

@ -0,0 +1,310 @@
use std::{
collections::{BTreeMap, HashMap},
fs::File,
hash::Hash,
io::BufReader,
path::Path,
};
use futures::{stream, StreamExt};
use rustypipe::{
client::RustyPipe,
model::{locale::LANGUAGES, Language},
timeago::{self, TimeAgo},
};
use serde::{Deserialize, Serialize};
use crate::util;
type CollectedDates = BTreeMap<Language, BTreeMap<DateCase, String>>;
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
enum DateCase {
Today,
Yesterday,
Ago,
Jan,
Feb,
Mar,
Apr,
May,
Jun,
Jul,
Aug,
Sep,
Oct,
Nov,
Dec,
}
/// Collect 'Playlist updated' dates in every supported language
/// and write them to `testfiles/date/playlist_samples.json`.
///
/// YouTube's API outputs the update date of playlists only in a
/// textual format (e.g. *Last updated on Jan 3, 2020*), which varies
/// by language.
///
/// For recently updated playlists YouTube shows 'today', 'yesterday'
/// and 'x<=7 days ago' instead of the literal date.
///
/// To parse these dates correctly we need to collect a sample set
/// in every language.
///
/// This set includes
/// - one playlist updated today
/// - one playlist updated yesterday
/// - one playlist updated 2-7 days ago
/// - one playlist from every month. Note that there should not
/// be any dates which include the same number twice (e.g. 01.01.2020).
///
/// Because the relative dates change with time, the first three playlists
/// should be checked and eventually changed before running the program.
pub async fn collect_dates(project_root: &Path, concurrency: usize) {
let mut json_path = project_root.to_path_buf();
json_path.push("testfiles/date/playlist_samples.json");
// These are the sample playlists
let cases = [
(
DateCase::Today,
"RDCLAK5uy_kj3rhiar1LINmyDcuFnXihEO0K1NQa2jI",
),
(DateCase::Yesterday, "PL7zsB-C3aNu2yRY2869T0zj1FhtRIu5am"),
(DateCase::Ago, "PLmB6td997u3kUOrfFwkULZ910ho44oQSy"),
(DateCase::Jan, "PL1J-6JOckZtFjcni6Xj1pLYglJp6JCpKD"),
(DateCase::Feb, "PL1J-6JOckZtETrbzwZE7mRIIK6BzWNLAs"),
(DateCase::Mar, "PL1J-6JOckZtG3AVdvBXhMO64mB2k3BtKi"),
(DateCase::Apr, "PL1J-6JOckZtE_rUpK24S6X5hOE4eQoprN"),
(DateCase::May, "PL1J-6JOckZtG1ThBxoSLFL-Jg4sa2iX_a"),
(DateCase::Jun, "PL1J-6JOckZtF_wSzkXBl91pit9d6Fh0QF"),
(DateCase::Jul, "PL1J-6JOckZtE_P9Xx8D3b2O6w0idhuKBe"),
(DateCase::Aug, "PL1J-6JOckZtFFQeWx-ZC0ubpJCEWmGWRx"),
(DateCase::Sep, "PL1J-6JOckZtHVs0JhBW_qfsW-dtXuM0mQ"),
(DateCase::Oct, "PL1J-6JOckZtE4g-XgZkL_N0kkoKui5Eys"),
(DateCase::Nov, "PL1J-6JOckZtEzjMUEyPyPpG836pjeIapw"),
(DateCase::Dec, "PL1J-6JOckZtHo91uApeb10Qlf2XhkfM-9"),
];
let rp = RustyPipe::default();
let collected_dates = stream::iter(LANGUAGES)
.map(|lang| {
let rp = rp.clone();
async move {
let mut map: BTreeMap<DateCase, String> = BTreeMap::new();
for (case, pl_id) in cases {
let playlist = rp.query().lang(lang).get_playlist(pl_id).await.unwrap();
map.insert(case, playlist.last_update_txt.unwrap());
}
(lang, map)
}
})
.buffer_unordered(concurrency)
.collect::<BTreeMap<_, _>>()
.await;
let file = File::create(json_path).unwrap();
serde_json::to_writer_pretty(file, &collected_dates).unwrap();
}
/// Attempt to parse the dates collected by `collect-playlist-dates`
/// and write the results to `dictionary.json`.
///
/// The ND (no digit) tokens (today, tomorrow) of some languages cannot be
/// parsed automatically and require manual work.
pub fn write_samples_to_dict(project_root: &Path) {
let mut json_path = project_root.to_path_buf();
json_path.push("testfiles/date/playlist_samples.json");
let json_file = File::open(json_path).unwrap();
let collected_dates: CollectedDates =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let mut dict = util::read_dict(project_root);
let langs = dict.keys().map(|k| k.to_owned()).collect::<Vec<_>>();
let months = [
DateCase::Jan,
DateCase::Feb,
DateCase::Mar,
DateCase::Apr,
DateCase::May,
DateCase::Jun,
DateCase::Jul,
DateCase::Aug,
DateCase::Sep,
DateCase::Oct,
DateCase::Nov,
DateCase::Dec,
];
let dates: [(u32, u32, u32); 12] = [
(2020, 1, 3),
(2016, 2, 7),
(2015, 3, 9),
(2017, 4, 2),
(2014, 5, 22),
(2014, 6, 28),
(2014, 7, 2),
(2015, 8, 23),
(2018, 9, 16),
(2014, 10, 31),
(2016, 11, 3),
(2021, 12, 24),
];
for lang in langs {
let mut datestr_tables = vec![collected_dates.get(&lang).unwrap()];
dict.get(&lang)
.unwrap()
.equivalent
.iter()
.for_each(|l| datestr_tables.push(collected_dates.get(l).unwrap()));
let dict_entry = dict.entry(lang).or_default();
let mut num_order = "".to_owned();
let collect_nd_tokens = !matches!(
lang,
// ND tokens of these languages must be edited manually
Language::Ja
| Language::ZhCn
| Language::ZhHk
| Language::ZhTw
| Language::Ko
| Language::Gu
| Language::Pa
| Language::Ur
| Language::Uz
| Language::Te
| Language::PtPt
// Singhalese YT translation has an error (today == tomorrow)
| Language::Si
);
dict_entry.months = BTreeMap::new();
if collect_nd_tokens {
dict_entry.timeago_nd_tokens = BTreeMap::new();
}
for datestr_table in &datestr_tables {
let mut month_words: HashMap<String, usize> = HashMap::new();
let mut td_words: HashMap<String, i8> = HashMap::new();
// Today/Yesterday
{
let mut parse = |string: &str, n: i8| {
util::filter_datestr(string)
.split_whitespace()
.for_each(|word| {
td_words
.entry(word.to_owned())
.and_modify(|e| *e = 0)
.or_insert(n);
});
};
parse(datestr_table.get(&DateCase::Today).unwrap(), 1);
parse(datestr_table.get(&DateCase::Yesterday).unwrap(), 2);
parse(datestr_table.get(&DateCase::Ago).unwrap(), 0);
parse(datestr_table.get(&DateCase::Jan).unwrap(), 0);
}
// n days ago
{
let datestr = datestr_table.get(&DateCase::Ago).unwrap();
let tago = timeago::parse_timeago(lang, datestr);
assert_eq!(
tago,
Some(TimeAgo {
n: 3,
unit: timeago::TimeUnit::Day
}),
"lang: {}, txt: {}",
lang,
datestr
);
}
// Absolute dates (Jan 3, 2020)
months.iter().enumerate().for_each(|(n, m)| {
let datestr = datestr_table.get(m).unwrap();
// Get order of numbers
let nums = util::parse_numeric_vec::<u32>(datestr);
let date = dates[n];
let this_num_order = nums
.iter()
.map(|n| {
if n == &date.0 {
"Y"
} else if n == &date.1 {
"M"
} else if n == &date.2 {
"D"
} else {
panic!("invalid number {} in {}", n, datestr);
}
})
.collect::<String>();
if num_order.is_empty() {
num_order = this_num_order;
} else {
assert_eq!(this_num_order, num_order, "lang: {}", lang);
}
// Insert words into the map
util::filter_datestr(datestr)
.split_whitespace()
.for_each(|word| {
month_words
.entry(word.to_owned())
.and_modify(|e| *e = 0)
.or_insert(n + 1);
});
});
month_words.iter().for_each(|(word, m)| {
if *m != 0 {
dict_entry.months.insert(word.to_owned(), *m as u8);
};
});
if collect_nd_tokens {
td_words.iter().for_each(|(word, n)| {
match n {
// Today
1 => {
dict_entry
.timeago_nd_tokens
.insert(word.to_owned(), "0D".to_owned());
}
// Yesterday
2 => {
dict_entry
.timeago_nd_tokens
.insert(word.to_owned(), "1D".to_owned());
}
_ => {}
};
});
if datestr_tables.len() == 1 {
assert_eq!(
dict_entry.timeago_nd_tokens.len(),
2,
"lang: {}, nd_tokens: {:?}",
lang,
&dict_entry.timeago_nd_tokens
);
}
}
}
dict_entry.date_order = num_order;
}
util::write_dict(project_root, &dict);
}

View file

@ -0,0 +1,132 @@
use std::{
fs::File,
path::{Path, PathBuf},
};
use rustypipe::{
cache::FileStorage,
client::{ClientType, RustyPipe},
report::{Report, Reporter},
};
const CLIENT_TYPES: [ClientType; 5] = [
ClientType::Desktop,
ClientType::DesktopMusic,
ClientType::TvHtml5Embed,
ClientType::Android,
ClientType::Ios,
];
/// Store pretty-printed response json
pub struct TestFileReporter {
path: PathBuf,
}
impl TestFileReporter {
pub fn new<P: AsRef<Path>>(path: P) -> Self {
Self {
path: path.as_ref().to_path_buf(),
}
}
}
impl Reporter for TestFileReporter {
fn report(&self, report: &Report) {
let data =
serde_json::from_str::<serde_json::Value>(&report.http_request.resp_body).unwrap();
let file = File::create(&self.path).unwrap();
serde_json::to_writer_pretty(file, &data).unwrap();
println!("Downloaded {}", self.path.display());
}
}
fn rp_testfile(json_path: &Path) -> RustyPipe {
let reporter = TestFileReporter::new(json_path);
RustyPipe::new(
Some(Box::new(FileStorage::default())),
Some(Box::new(reporter)),
None,
)
}
pub async fn download_testfiles(project_root: &Path) {
let mut testfiles = project_root.to_path_buf();
testfiles.push("testfiles");
tokio::join!(
player(&testfiles),
player_model(&testfiles),
playlist(&testfiles)
);
}
async fn player(testfiles: &Path) {
let video_id = "pPvd8UxmSbQ";
for client_type in CLIENT_TYPES {
let mut json_path = testfiles.to_path_buf();
json_path.push("player");
json_path.push(format!("{:?}_video.json", client_type).to_lowercase());
if json_path.exists() {
continue;
}
let rp = rp_testfile(&json_path);
rp.query()
.report(true)
.strict(true)
.get_player(video_id, client_type)
.await
.unwrap();
}
}
async fn player_model(testfiles: &Path) {
let rp = RustyPipe::default();
for (name, id) in [("multilanguage", "tVWWp1PqDus"), ("hdr", "LXb3EKWsInQ")] {
let mut json_path = testfiles.to_path_buf();
json_path.push("player_model");
json_path.push(format!("{}.json", name).to_lowercase());
if json_path.exists() {
continue;
}
let player_data = rp
.query()
.strict(true)
.get_player(id, ClientType::Desktop)
.await
.unwrap();
let file = File::create(&json_path).unwrap();
serde_json::to_writer_pretty(file, &player_data).unwrap();
println!("Downloaded {}", json_path.display());
}
}
async fn playlist(testfiles: &Path) {
for (name, id) in [
("short", "RDCLAK5uy_kFQXdnqMaQCVx2wpUM4ZfbsGCDibZtkJk"),
("long", "PL5dDx681T4bR7ZF1IuWzOv1omlRbE7PiJ"),
("nomusic", "PL1J-6JOckZtE_P9Xx8D3b2O6w0idhuKBe"),
] {
let mut json_path = testfiles.to_path_buf();
json_path.push("playlist");
json_path.push(format!("playlist_{}.json", name));
if json_path.exists() {
continue;
}
let rp = rp_testfile(&json_path);
rp.query()
.report(true)
.strict(true)
.get_playlist(id)
.await
.unwrap();
}
}

View file

@ -0,0 +1,118 @@
use std::fmt::Write;
use std::path::Path;
use fancy_regex::Regex;
use once_cell::sync::Lazy;
use rustypipe::timeago::TimeUnit;
use crate::util;
const TARGET_PATH: &str = "src/dictionary.rs";
fn parse_tu(tu: &str) -> (u8, Option<TimeUnit>) {
static TU_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"^(\d*)(\w?)$").unwrap());
match TU_PATTERN.captures(tu).unwrap() {
Some(cap) => (
cap.get(1).unwrap().as_str().parse().unwrap_or(1),
match cap.get(2).unwrap().as_str() {
"s" => Some(TimeUnit::Second),
"m" => Some(TimeUnit::Minute),
"h" => Some(TimeUnit::Hour),
"D" => Some(TimeUnit::Day),
"W" => Some(TimeUnit::Week),
"M" => Some(TimeUnit::Month),
"Y" => Some(TimeUnit::Year),
"" => None,
_ => panic!("invalid time unit: {}", tu),
},
),
None => panic!("invalid time unit: {}", tu),
}
}
pub fn generate_dictionary(project_root: &Path) {
let dict = util::read_dict(project_root);
let code_head = r#"// This file is automatically generated. DO NOT EDIT.
use crate::{
model::Language,
timeago::{DateCmp, TaToken, TimeUnit},
};
pub struct Entry {
pub by_char: bool,
pub timeago_tokens: phf::Map<&'static str, TaToken>,
pub date_order: &'static [DateCmp],
pub months: phf::Map<&'static str, u8>,
pub timeago_nd_tokens: phf::Map<&'static str, TaToken>,
}
"#;
let mut code_timeago_tokens = r#"#[rustfmt::skip]
pub fn entry(lang: Language) -> Entry {
match lang {
"#
.to_owned();
dict.iter().for_each(|(lang, entry)| {
// Match selector
let mut selector = format!("Language::{:?}", lang);
entry.equivalent.iter().for_each(|eq| {
let _ = write!(selector, " | Language::{:?}", eq);
});
// Timeago tokens
let mut ta_tokens = phf_codegen::Map::<&str>::new();
entry.timeago_tokens.iter().for_each(|(txt, tu_str)| {
let (n, unit) = parse_tu(tu_str);
match unit {
Some(unit) => ta_tokens.entry(
txt,
&format!("TaToken {{ n: {}, unit: Some(TimeUnit::{:?}) }}", n, unit),
),
None => ta_tokens.entry(txt, &format!("TaToken {{ n: {}, unit: None }}", n)),
};
});
// Months
let mut months = phf_codegen::Map::<&str>::new();
entry.months.iter().for_each(|(txt, n_mon)| {
months.entry(txt, &n_mon.to_string());
});
// Timeago(ND) tokens
let mut ta_nd_tokens = phf_codegen::Map::<&str>::new();
entry.timeago_nd_tokens.iter().for_each(|(txt, tu_str)| {
let (n, unit) = parse_tu(tu_str);
match unit {
Some(unit) => ta_nd_tokens.entry(
txt,
&format!("TaToken {{ n: {}, unit: Some(TimeUnit::{:?}) }}", n, unit),
),
None => ta_nd_tokens.entry(txt, &format!("TaToken {{ n: {}, unit: None }}", n)),
};
});
// Date order
let mut date_order = "&[".to_owned();
entry.date_order.chars().for_each(|c| {
let _ = write!(date_order, "DateCmp::{}, ", c);
});
date_order = date_order.trim_end_matches([' ', ',']).to_owned() + "]";
let code_ta_tokens = &ta_tokens.build().to_string().replace('\n', "\n ");
let code_ta_nd_tokens = &ta_nd_tokens.build().to_string().replace('\n', "\n ");
let code_months = &months.build().to_string().replace('\n', "\n ");
let _ = write!(code_timeago_tokens, "{} => Entry {{\n by_char: {:?},\n timeago_tokens: {},\n date_order: {},\n months: {},\n timeago_nd_tokens: {},\n }},\n ",
selector, entry.by_char, code_ta_tokens, date_order, code_months, code_ta_nd_tokens);
});
code_timeago_tokens = code_timeago_tokens.trim_end().to_owned() + "\n }\n}\n";
let code = format!("{}\n{}", code_head, code_timeago_tokens);
let mut target_path = project_root.to_path_buf();
target_path.push(TARGET_PATH);
std::fs::write(target_path, code).unwrap();
}

354
codegen/src/gen_locales.rs Normal file
View file

@ -0,0 +1,354 @@
use std::collections::BTreeMap;
use std::fmt::Write;
use std::path::Path;
use reqwest::header;
use reqwest::Client;
use serde::Deserialize;
use serde_with::serde_as;
use serde_with::VecSkipError;
#[serde_as]
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct LanguageMenu {
#[serde_as(as = "VecSkipError<_>")]
actions: Vec<ActionWrap>,
}
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct ActionWrap {
open_popup_action: OpenPopupAction,
}
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct OpenPopupAction {
popup: Popup,
}
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct Popup {
multi_page_menu_renderer: MultiPageMenuRenderer<MenuSectionRenderer>,
}
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct MultiPageMenuRenderer<T> {
sections: Vec<MenuSectionRendererWrap<T>>,
}
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct MenuSectionRendererWrap<T> {
multi_page_menu_section_renderer: T,
}
#[serde_as]
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct MenuSectionRenderer {
#[serde_as(as = "VecSkipError<_>")]
items: Vec<CompactLinkRendererWrap>,
}
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct CompactLinkRendererWrap {
compact_link_renderer: CompactLinkRenderer,
}
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct CompactLinkRenderer {
icon: Icon,
service_endpoint: ServiceEndpoint<MenuAction>,
}
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Icon {
pub icon_type: String,
}
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct ServiceEndpoint<T> {
signal_service_endpoint: SignalServiceEndpoint<T>,
}
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct SignalServiceEndpoint<T> {
actions: Vec<T>,
}
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct MenuAction {
get_multi_page_menu_action: MultiPageMenuAction,
}
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct MultiPageMenuAction {
menu: Menu,
}
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct Menu {
multi_page_menu_renderer: MultiPageMenuRenderer<ItemSectionRenderer>,
}
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct ItemSectionRenderer {
items: Vec<LanguageItemWrap>,
}
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct LanguageItemWrap {
compact_link_renderer: LanguageItem,
}
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct LanguageItem {
title: Text,
service_endpoint: ServiceEndpoint<LanguageCountryAction>,
}
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct LanguageCountryAction {
#[serde(alias = "selectCountryCommand")]
select_language_command: LanguageCountryCommand,
}
#[derive(Clone, Debug, Deserialize)]
struct LanguageCountryCommand {
#[serde(alias = "gl")]
hl: String,
}
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct Text {
simple_text: String,
}
pub async fn generate_locales(project_root: &Path) {
let (languages, countries) = get_locales().await;
let code_head = r#"// This file is automatically generated. DO NOT EDIT.
use std::{fmt::Display, str::FromStr};
use serde::{Deserialize, Serialize};
"#;
let code_foot = r#"impl Display for Language {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(
&serde_json::to_string(self).map_or("".to_owned(), |s| s[1..s.len() - 1].to_owned()),
)
}
}
impl Display for Country {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(
&serde_json::to_string(self).map_or("".to_owned(), |s| s[1..s.len() - 1].to_owned()),
)
}
}
impl FromStr for Language {
type Err = serde_json::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
serde_json::from_str(&format!("\"{}\"", s))
}
}
impl FromStr for Country {
type Err = serde_json::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
serde_json::from_str(&format!("\"{}\"", s))
}
}
"#;
let mut code_langs =
r#"#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[serde(rename_all = "lowercase")]
pub enum Language {
"#.to_owned();
let mut code_countries =
r#"#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[serde(rename_all = "UPPERCASE")]
pub enum Country {
"#.to_owned();
let mut code_lang_array = format!("pub const LANGUAGES: [Language; {}] = [\n", languages.len());
let mut code_country_array =
format!("pub const COUNTRIES: [Country; {}] = [\n", countries.len());
let mut code_lang_names = r#"impl Language {
pub fn name(&self) -> &str {
match self {
"#
.to_owned();
let mut code_country_names = r#"impl Country {
pub fn name(&self) -> &str {
match self {
"#
.to_owned();
languages.iter().for_each(|(c, n)| {
let enum_name = c
.split('-')
.map(|c| {
format!(
"{}{}",
c[0..1].to_owned().to_uppercase(),
c[1..].to_owned().to_lowercase()
)
})
.collect::<String>();
// Language enum
let _ = write!(code_langs, " /// {}\n ", n);
if c.contains('-') {
let _ = write!(code_langs, "#[serde(rename = \"{}\")]\n ", c);
}
code_langs += &enum_name;
code_langs += ",\n";
// Language array
let _ = writeln!(code_lang_array, " Language::{},", enum_name);
// Language names
let _ = writeln!(
code_lang_names,
" Language::{} => \"{}\",",
enum_name, n
);
});
code_langs += "}\n";
countries.iter().for_each(|(c, n)| {
let enum_name = c[0..1].to_owned().to_uppercase() + &c[1..].to_owned().to_lowercase();
// Country enum
let _ = writeln!(code_countries, " /// {}", n);
let _ = writeln!(code_countries, " {},", enum_name);
// Country array
let _ = writeln!(code_country_array, " Country::{},", enum_name);
// Country names
let _ = writeln!(
code_country_names,
" Country::{} => \"{}\",",
enum_name, n
);
});
code_countries += "}\n";
code_lang_array += "];\n";
code_country_array += "];\n";
code_lang_names += " }\n }\n}\n";
code_country_names += " }\n }\n}\n";
let code = format!(
"{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}",
code_head,
code_langs,
code_countries,
code_lang_array,
code_country_array,
code_lang_names,
code_country_names,
code_foot
);
let mut target_path = project_root.to_path_buf();
target_path.push("src/model/locale.rs");
std::fs::write(target_path, code).unwrap();
}
async fn get_locales() -> (BTreeMap<String, String>, BTreeMap<String, String>) {
let client = Client::new();
let resp = client
.post("https://www.youtube.com/youtubei/v1/account/account_menu?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8")
.header(header::CONTENT_TYPE, "application/json")
.body(
r##"{"context":{"client":{"clientName":"WEB","clientVersion":"2.20220914.06.00","platform":"DESKTOP","originalUrl":"https://www.youtube.com/","hl":"en","gl":"US"},"request":{"internalExperimentFlags":[],"useSsl":true},"user":{"lockedSafetyMode":false}}}"##
)
.send().await
.unwrap()
.error_for_status()
.unwrap();
let language_menu = resp.json::<LanguageMenu>().await.unwrap();
let lm_section = &language_menu.actions[0]
.open_popup_action
.popup
.multi_page_menu_renderer
.sections
.iter()
.find(|s| s.multi_page_menu_section_renderer.items.len() >= 2)
.unwrap();
let lang_section = lm_section
.multi_page_menu_section_renderer
.items
.iter()
.find(|s| s.compact_link_renderer.icon.icon_type == "TRANSLATE")
.unwrap();
let country_section = lm_section
.multi_page_menu_section_renderer
.items
.iter()
.find(|s| s.compact_link_renderer.icon.icon_type == "LANGUAGE")
.unwrap();
let languages = map_language_section(lang_section);
let countries = map_language_section(country_section);
(languages, countries)
}
fn map_language_section(section: &CompactLinkRendererWrap) -> BTreeMap<String, String> {
section
.compact_link_renderer
.service_endpoint
.signal_service_endpoint
.actions[0]
.get_multi_page_menu_action
.menu
.multi_page_menu_renderer
.sections[0]
.multi_page_menu_section_renderer
.items
.iter()
.map(|i| {
(
i.compact_link_renderer
.service_endpoint
.signal_service_endpoint
.actions[0]
.select_language_command
.hl
.to_owned(),
i.compact_link_renderer.title.simple_text.to_owned(),
)
})
.collect()
}

50
codegen/src/main.rs Normal file
View file

@ -0,0 +1,50 @@
mod collect_playlist_dates;
mod download_testfiles;
mod gen_dictionary;
mod gen_locales;
mod util;
use std::path::PathBuf;
use clap::{Parser, Subcommand};
#[derive(Parser)]
struct Cli {
#[clap(subcommand)]
command: Commands,
#[clap(short = 'd', default_value = "..")]
project_root: PathBuf,
#[clap(short, default_value = "8")]
concurrency: usize,
}
#[derive(Subcommand)]
enum Commands {
CollectPlaylistDates,
WritePlaylistDates,
GenLocales,
GenDict,
DownloadTestfiles,
}
#[tokio::main]
async fn main() {
env_logger::init();
let cli = Cli::parse();
match cli.command {
Commands::CollectPlaylistDates => {
collect_playlist_dates::collect_dates(&cli.project_root, cli.concurrency).await;
}
Commands::WritePlaylistDates => {
collect_playlist_dates::write_samples_to_dict(&cli.project_root);
}
Commands::GenLocales => {
gen_locales::generate_locales(&cli.project_root).await;
}
Commands::GenDict => gen_dictionary::generate_dictionary(&cli.project_root),
Commands::DownloadTestfiles => {
download_testfiles::download_testfiles(&cli.project_root).await
}
};
}

72
codegen/src/util.rs Normal file
View file

@ -0,0 +1,72 @@
use std::{collections::BTreeMap, fs::File, io::BufReader, path::Path, str::FromStr};
use rustypipe::model::Language;
use serde::{Deserialize, Serialize};
const DICT_PATH: &str = "testfiles/date/dictionary.json";
type Dictionary = BTreeMap<Language, DictEntry>;
#[derive(Debug, Default, Serialize, Deserialize)]
#[serde(default)]
pub struct DictEntry {
pub equivalent: Vec<Language>,
pub by_char: bool,
pub timeago_tokens: BTreeMap<String, String>,
pub date_order: String,
pub months: BTreeMap<String, u8>,
pub timeago_nd_tokens: BTreeMap<String, String>,
}
pub fn read_dict(project_root: &Path) -> Dictionary {
let mut json_path = project_root.to_path_buf();
json_path.push(DICT_PATH);
let json_file = File::open(json_path).unwrap();
serde_json::from_reader(BufReader::new(json_file)).unwrap()
}
pub fn write_dict(project_root: &Path, dict: &Dictionary) {
let mut json_path = project_root.to_path_buf();
json_path.push(DICT_PATH);
let json_file = File::create(json_path).unwrap();
serde_json::to_writer_pretty(json_file, dict).unwrap();
}
pub fn filter_datestr(string: &str) -> String {
string
.to_lowercase()
.chars()
.filter_map(|c| {
if c == '\u{200b}' || c.is_ascii_digit() {
None
} else if c == '-' {
Some(' ')
} else {
Some(c)
}
})
.collect()
}
/// Parse all numbers occurring in a string and reurn them as a vec
pub fn parse_numeric_vec<F>(string: &str) -> Vec<F>
where
F: FromStr,
{
let mut numbers = vec![];
let mut buf = String::new();
for c in string.chars() {
if c.is_ascii_digit() {
buf.push(c);
} else if !buf.is_empty() {
buf.parse::<F>().map_or((), |n| numbers.push(n));
buf.clear();
}
}
if !buf.is_empty() {
buf.parse::<F>().map_or((), |n| numbers.push(n));
}
numbers
}