finish timeago parser, refactor codegen

This commit is contained in:
ThetaDev 2022-09-05 21:43:43 +02:00
parent 500ea77788
commit 513bf1dc9c
12 changed files with 641 additions and 895 deletions

View file

@ -29,12 +29,12 @@ Throttling issue: Y8JFxS1HlDo
4.657 Songs: PLI_eFW8NAFzYAXZ5DrU6E6mQ_XfhaLBUX
186 Songs: PLbZIPy20-1pN7mqjckepWF78ndb6ci_qi
Playlist update dates:
Playlist update dates (05.09.2022):
today: RDCLAK5uy_kj3rhiar1LINmyDcuFnXihEO0K1NQa2jI
yesterday: PL3-sRm8xAzY9sDilvaWjCwCI0TkUzYdOG
2 days ago: PL3qHjxSSl7AER3rxfEr4SiHNr-ihbQyqU
3 days ago: PLHr0jWPfopte182N54r1ra7tkRJC1fmPu
5 days ago: PLF7B92F492FDAE703
yesterday: PL4C44E2875308A280
2 days ago: PL7zsB-C3aNu2yRY2869T0zj1FhtRIu5am
5 days ago: PL3-sRm8xAzY9sDilvaWjCwCI0TkUzYdOG
7 days ago: PLHr0jWPfopte182N54r1ra7tkRJC1fmPu
Jan PL1J-6JOckZtHxTA3hN5SK7gBQaFfKzeXr 01.01.2016
Feb PL1J-6JOckZtETrbzwZE7mRIIK6BzWNLAs 07.02.2016

View file

@ -4,9 +4,6 @@ pub mod video;
mod response;
#[cfg(test)]
mod scripts;
use std::sync::Arc;
use anyhow::{anyhow, Context, Result};
@ -190,7 +187,7 @@ impl RustyTube {
}
}
fn get_ytclient(&self, client_type: ClientType) -> Arc<dyn YTClient> {
pub(crate) fn get_ytclient(&self, client_type: ClientType) -> Arc<dyn YTClient> {
match client_type {
ClientType::Desktop => self.desktop_client.clone(),
ClientType::DesktopMusic => self.desktop_music_client.clone(),

View file

@ -1,3 +0,0 @@
#![cfg(test)]
mod language_menu;
mod timeago_testfiles;

View file

@ -1,671 +0,0 @@
#![cfg(test)]
use std::{
collections::{BTreeMap, HashSet},
fs::File,
io::BufReader,
path::Path,
};
use fancy_regex::Regex;
use futures::{stream, StreamExt};
use intl_pluralrules::{PluralCategory, PluralRuleType, PluralRules};
use log::{error, info};
use once_cell::sync::Lazy;
use reqwest::Method;
use serde::{Deserialize, Serialize};
use unic_langid::LanguageIdentifier;
use crate::{
client::{response, ClientType, ContextYT, RustyTube},
model::{Country, Language},
timeago::{self, TimeUnit, TimeagoPattern, LANGUAGES},
};
#[derive(Clone, Debug, Serialize)]
#[serde(rename_all = "camelCase")]
struct QChannel {
context: ContextYT,
browse_id: String,
params: String,
}
async fn get_channel_datestrings(rp: &RustyTube, channel_id: &str) -> Vec<String> {
let client = rp.get_ytclient(ClientType::Desktop);
let context = client.get_context(true).await;
let request_body = QChannel {
context,
browse_id: channel_id.to_owned(),
params: "EgZ2aWRlb3PyBgQKAjoA".to_owned(),
};
let resp = client
.request_builder(Method::POST, "browse")
.await
.json(&request_body)
.send()
.await
.unwrap()
.error_for_status()
.unwrap();
let channel_response = resp.json::<response::Channel>().await.unwrap();
channel_response
.contents
.two_column_browse_results_renderer
.tabs[0]
.tab_renderer
.content
.section_list_renderer
.contents[0]
.item_section_renderer
.contents[0]
.grid_renderer
.items
.iter()
.filter_map(|itm| match itm {
response::VideoListItem::GridVideoRenderer { video } => {
video.published_time_text.to_owned()
}
response::VideoListItem::ContinuationItemRenderer { .. } => None,
})
.collect::<Vec<_>>()
}
async fn get_comment_initial_ctoken(rp: &RustyTube, video_id: &str, latest: bool) -> String {
let video_response = rp.get_video_response(video_id).await.unwrap();
match latest {
true => video_response
.engagement_panels
.iter()
.find_map(|p| {
p.engagement_panel_section_list_renderer
.header
.engagement_panel_title_header_renderer
.menu
.sort_filter_sub_menu_renderer
.sub_menu_items
.get(1)
.map(|i| i.service_endpoint.continuation_command.token.to_owned())
})
.unwrap(),
false => video_response
.contents
.two_column_watch_next_results
.results
.results
.contents
.iter()
.find_map(|c| match c {
response::video::VideoResultsItem::ItemSectionRenderer {
contents,
section_identifier,
} => match section_identifier == "comment-item-section" {
true => match &contents[0] {
response::video::ItemSection::ContinuationItemRenderer {
continuation_endpoint,
} => Some(continuation_endpoint.continuation_command.token.to_owned()),
_ => None,
},
false => None,
},
_ => None,
})
.unwrap(),
}
}
async fn get_comment_datestrings(rp: &RustyTube, ctoken: &str) -> (Vec<String>, Option<String>) {
let comments_response = rp.get_comments_response(ctoken).await.unwrap();
let mut next_ctoken: Option<String> = None;
let datestrings = comments_response
.on_response_received_endpoints
/*
.iter()
.find(|e| {
!e.append_continuation_items_action
.continuation_items
.is_empty()
&& matches!(
&e.append_continuation_items_action.continuation_items[0],
CommentListItem::CommentsHeaderRenderer { count_text }
)
})
.unwrap()
*/
.iter()
.rev()
.next()
.unwrap()
.append_continuation_items_action
.continuation_items
.iter()
.filter_map(|itm| match itm {
response::video::CommentListItem::CommentThreadRenderer { comment, .. } => {
Some(comment.comment_renderer.published_time_text.to_owned())
}
response::video::CommentListItem::ContinuationItemRenderer {
continuation_endpoint,
} => {
next_ctoken = Some(continuation_endpoint.continuation_command.token.to_owned());
None
}
_ => None,
})
.collect::<Vec<_>>();
(datestrings, next_ctoken)
}
// #[test_log::test(tokio::test)]
#[allow(dead_code)]
async fn download_timeago_testfiles() {
let json_path = Path::new("testfiles/date/timeago_samples.json").to_path_buf();
if json_path.exists() {
return;
}
let channel_ids = [
"UCeY0bbntWzzVIaj2z3QigXg",
"UCcmpeVbSSQlZRvHfdC-CRwg",
"UC65afEgL62PGFWXY7n6CUbA",
"UCEOXxzW2vU0P-0THehuIIeg",
];
// Get strings of all languages
let mut lang_strings: BTreeMap<Language, Vec<String>> = BTreeMap::new();
for lang in timeago::LANGUAGES {
let rp = RustyTube::new_with_ua(lang, Country::Us, None);
let strings = stream::iter(channel_ids)
.map(|id| get_channel_datestrings(&rp, id))
.buffered(4)
.collect::<Vec<_>>()
.await
.into_iter()
.flatten()
.collect::<Vec<_>>();
lang_strings.insert(lang, strings);
}
let mut en_strings_uniq: HashSet<&str> = HashSet::new();
let mut uniq_ids: HashSet<usize> = HashSet::new();
lang_strings[&Language::En]
.iter()
.enumerate()
.for_each(|(n, s)| {
if en_strings_uniq.insert(s) {
uniq_ids.insert(n);
}
});
let strings_map = lang_strings
.iter()
.map(|(lang, strings)| {
(
lang,
strings
.iter()
.enumerate()
.filter(|(n, _)| uniq_ids.contains(n))
.map(|(_, s)| s)
.collect::<Vec<_>>(),
)
})
.collect::<BTreeMap<_, _>>();
let file = File::create(json_path).unwrap();
serde_json::to_writer_pretty(file, &strings_map).unwrap();
}
#[derive(Debug, Clone, Deserialize)]
struct PluralRulesData {
supplemental: PluralRulesInner,
}
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "kebab-case")]
struct PluralRulesInner {
plurals_type_cardinal: BTreeMap<String, Ruleset>,
}
#[derive(Debug, Clone, Deserialize)]
struct Ruleset {
#[serde(rename = "pluralRule-count-one")]
one: Option<String>,
#[serde(rename = "pluralRule-count-two")]
two: Option<String>,
#[serde(rename = "pluralRule-count-few")]
few: Option<String>,
#[serde(rename = "pluralRule-count-many")]
many: Option<String>,
#[serde(rename = "pluralRule-count-other")]
other: Option<String>,
}
#[derive(Debug, Copy, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
enum PluralCat {
Zero,
One,
Two,
Few,
Many,
Other,
}
impl From<PluralCategory> for PluralCat {
fn from(value: PluralCategory) -> Self {
match value {
PluralCategory::ZERO => Self::Zero,
PluralCategory::ONE => Self::One,
PluralCategory::TWO => Self::Two,
PluralCategory::FEW => Self::Few,
PluralCategory::MANY => Self::Many,
PluralCategory::OTHER => Self::Other,
}
}
}
static PLURAL_RULES: Lazy<BTreeMap<String, HashSet<PluralCat>>> = Lazy::new(|| {
let json_path = Path::new("testfiles/date/cldr_pluralrules_cardinals.json");
let json_file = File::open(json_path).unwrap();
serde_json::from_reader::<_, PluralRulesData>(BufReader::new(json_file))
.unwrap()
.supplemental
.plurals_type_cardinal
.iter()
.map(|(lang, rules)| {
let mut hs: HashSet<PluralCat> = HashSet::new();
if rules.one.is_some() {
hs.insert(PluralCat::One);
}
if rules.two.is_some() {
hs.insert(PluralCat::Two);
}
if rules.few.is_some() {
hs.insert(PluralCat::Few);
}
if rules.many.is_some() {
hs.insert(PluralCat::Many);
}
if rules.other.is_some() {
hs.insert(PluralCat::Other);
}
(lang.to_owned(), hs)
})
.collect::<BTreeMap<_, _>>()
});
#[derive(Debug, Clone, Serialize, Deserialize)]
struct TimeagoTable {
entries: BTreeMap<Language, BTreeMap<TimeUnit, TimeagoTableEntry>>,
errors: BTreeMap<Language, HashSet<String>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct TimeagoTableEntry {
cases: BTreeMap<String, u8>,
missing_plurals: HashSet<PluralCat>,
}
const TIME_UNITS: [TimeUnit; 7] = [
TimeUnit::Second,
TimeUnit::Minute,
TimeUnit::Hour,
TimeUnit::Day,
TimeUnit::Week,
TimeUnit::Month,
TimeUnit::Year,
];
fn new_timeago_table() -> TimeagoTable {
TimeagoTable {
entries: LANGUAGES
.iter()
.filter_map(|lang| {
// Check if language is redundant
match lang {
Language::EnGb
| Language::EnIn
| Language::FrCa
| Language::EsUs
| Language::Es419 => None,
_ => {
let cldr_lang_str = match lang {
Language::SrLatn => "sr".to_owned(),
Language::ZhCn | Language::ZhHk | Language::ZhTw => "zh".to_owned(),
_ => lang.to_string(),
};
let m = TIME_UNITS
.iter()
.map(|t| {
let missing_plurals = if t == &TimeUnit::Week {
// Week only has 3 valid values (2-4)
let mut mp = HashSet::new();
let l_id = cldr_lang_str.parse::<LanguageIdentifier>().unwrap();
let pr = PluralRules::create(l_id, PluralRuleType::CARDINAL)
.unwrap();
mp.insert(PluralCat::from(pr.select(2).unwrap()));
mp.insert(PluralCat::from(pr.select(3).unwrap()));
mp.insert(PluralCat::from(pr.select(4).unwrap()));
mp
} else {
PLURAL_RULES.get(&cldr_lang_str).unwrap().clone()
};
(
t.to_owned(),
TimeagoTableEntry {
cases: BTreeMap::new(),
missing_plurals,
},
)
})
.collect();
Some((lang.to_owned(), m))
}
}
})
.collect(),
errors: BTreeMap::new(),
}
}
fn read_timeago_table() -> TimeagoTable {
let json_path = Path::new("testfiles/date/timeago_table.json").to_path_buf();
if json_path.exists() {
let file = File::open(json_path).unwrap();
serde_json::from_reader(BufReader::new(file)).unwrap()
} else {
new_timeago_table()
}
}
fn write_timeago_table(timeago_table: &TimeagoTable) {
let json_path = Path::new("testfiles/date/timeago_table.json").to_path_buf();
let file = File::create(json_path).unwrap();
serde_json::to_writer_pretty(file, timeago_table).unwrap();
}
fn insert_timeago_table(
timeago_table: &mut TimeagoTable,
lang: &Language,
date_str: &str,
limit: Option<TimeUnit>,
ignore_1s: bool,
) -> bool {
let pattern = TimeagoPattern::from(lang.to_owned());
match pattern.parse(date_str) {
Some(timeago) => {
let entry = timeago_table
.entries
.get_mut(lang)
.unwrap()
.get_mut(&timeago.unit)
.unwrap();
let cldr_lang_str = &lang.to_string()[0..2];
let l_id: LanguageIdentifier = cldr_lang_str.parse().unwrap();
let pl_pat = PluralRules::create(l_id, PluralRuleType::CARDINAL).unwrap();
let pl = PluralCat::from(pl_pat.select(timeago.n).unwrap());
// Collect the case if its plural type is missing
if entry.missing_plurals.remove(&pl) {
entry.cases.insert(date_str.to_owned(), timeago.n);
info!(
"Collected `{}` ({} {:?})",
date_str, timeago.n, timeago.unit
);
}
timeago_table
.entries
.get(lang)
.unwrap()
.iter()
.all(|(t, entry)| {
(limit.is_some() && t > &limit.unwrap())
|| entry.missing_plurals.is_empty()
|| (ignore_1s
&& t == &TimeUnit::Second
&& entry.missing_plurals.len() == 1
&& entry.missing_plurals.contains(&PluralCat::One))
})
}
None => {
error!("Could not parse `{}`", date_str);
let errors = timeago_table
.errors
.entry(*lang)
.or_insert_with(|| HashSet::new());
errors.insert(date_str.to_owned());
false
}
}
}
async fn insert_timeago_table_datestrings(
rp: &RustyTube,
timeago_table: &mut TimeagoTable,
video_id: &str,
latest: bool,
limit: Option<TimeUnit>,
ignore_1s: bool,
) {
let mut ctoken = get_comment_initial_ctoken(&rp, video_id, latest).await;
let brace_pattern = Regex::new(r"\(.+\)").unwrap();
let lang = &rp.localization.language;
let err_baseline = timeago_table.errors.len();
for _ in 0..40 {
let (strings, new_ctoken) = get_comment_datestrings(&rp, &ctoken).await;
let res = strings
.iter()
.map(|s| {
// Remove zero-width space characters
let s = s.replace('\u{200b}', "");
// Remove braces
let s = brace_pattern.replace(&s, "");
let s = s.trim();
s.to_owned()
})
.find(|s| insert_timeago_table(timeago_table, lang, &s, limit, ignore_1s));
if res.is_some() {
break;
}
if timeago_table.errors.len() > err_baseline {
return;
}
if let Some(new_ctoken) = new_ctoken {
ctoken = new_ctoken.to_owned();
} else {
error!("end of comments");
break;
}
}
}
async fn insert_timeago_table_datestrings_channel(
rp: &RustyTube,
timeago_table: &mut TimeagoTable,
channel_id: &str,
) {
let lang = &rp.localization.language;
let strings = get_channel_datestrings(rp, channel_id).await;
strings
.iter()
.map(|s| {
// Remove zero-width space characters
let s = s.replace('\u{200b}', "");
let s = s.trim();
s.to_owned()
})
.for_each(|s| {
insert_timeago_table(timeago_table, lang, &s, None, false);
});
}
// #[test_log::test(tokio::test)]
#[allow(dead_code)]
async fn t_build_timeago_table() {
let mut timeago_table = read_timeago_table();
let ignore_1s = false;
let langs = timeago_table
.entries
.keys()
.map(|k| k.to_owned())
.collect::<Vec<_>>();
for lang in langs {
if timeago_table
.entries
.get(&lang)
.unwrap()
.iter()
.all(|(t, entry)| {
entry.missing_plurals.is_empty()
|| (ignore_1s
&& t == &TimeUnit::Second
&& entry.missing_plurals.len() == 1
&& entry.missing_plurals.contains(&PluralCat::One))
})
{
continue;
}
let rp = RustyTube::new_with_ua(lang, Country::Us, None);
println!("{}: 1s!", lang);
{
let ctoken = get_comment_initial_ctoken(&rp, "gQlMMD8auMs", true).await;
// let ctoken = get_comment_initial_ctoken(&rp, "k6jqx9kZgPM", true).await;
let brace_pattern = Regex::new(r"\(.+\)").unwrap();
let lang = &rp.localization.language;
let err_baseline = timeago_table.errors.len();
loop {
let (strings, _) = get_comment_datestrings(&rp, &ctoken).await;
println!("{}", strings[0]);
let res = strings
.iter()
.map(|s| {
// Remove zero-width space characters
let s = s.replace('\u{200b}', "");
// Remove braces
let s = brace_pattern.replace(&s, "");
let s = s.trim();
s.to_owned()
})
.find(|s| {
insert_timeago_table(
&mut timeago_table,
lang,
&s,
Some(TimeUnit::Second),
ignore_1s,
)
});
if res.is_some() {
break;
}
if timeago_table.errors.len() > err_baseline {
break;
}
}
}
println!("{}: 2s - n min", lang);
insert_timeago_table_datestrings(
&rp,
&mut timeago_table,
"gQlMMD8auMs",
true,
Some(TimeUnit::Minute),
ignore_1s,
)
.await;
println!("{}: x hr", lang);
insert_timeago_table_datestrings(
&rp,
&mut timeago_table,
"TohrPm3ICJE",
true,
Some(TimeUnit::Hour),
ignore_1s,
)
.await;
println!("{}: 1 hr - n day", lang);
insert_timeago_table_datestrings(
&rp,
&mut timeago_table,
"J9NQFACZYEU",
true,
Some(TimeUnit::Day),
ignore_1s,
)
.await;
println!("{}: week", lang);
insert_timeago_table_datestrings(
&rp,
&mut timeago_table,
"-zPDx6HQ_9w",
true,
Some(TimeUnit::Week),
ignore_1s,
)
.await;
println!("{}: 1 yr - n yr", lang);
insert_timeago_table_datestrings_channel(
&rp,
&mut timeago_table,
"UCEOXxzW2vU0P-0THehuIIeg",
)
.await;
println!("{}: 11 mon", lang);
insert_timeago_table_datestrings_channel(
&rp,
&mut timeago_table,
"UCY1kMZp36IQSyNx_9h4mpCg",
)
.await;
println!("{}: 13 yr", lang);
insert_timeago_table_datestrings_channel(
&rp,
&mut timeago_table,
"UCfw6qEAJMDbmgqQbuoB5moA",
)
.await;
write_timeago_table(&timeago_table);
}
}

View file

@ -1,13 +1,15 @@
#![cfg(test)]
use std::{
collections::BTreeMap,
fmt::Debug,
fs::File,
io::{BufReader, BufWriter, Write}, path::Path,
io::{BufReader},
};
use crate::{model::Language, timeago::TimeUnit};
use fancy_regex::Regex;
use once_cell::sync::Lazy;
use rustypipe::{model::Language, timeago::TimeUnit};
use serde::Deserialize;
const DICT_PATH: &str = "testfiles/date/dictionary.json";
@ -19,6 +21,8 @@ type Dictionary = BTreeMap<Language, DictEntry>;
struct DictEntry {
#[serde(default)]
equivalent: Vec<Language>,
#[serde(default)]
by_char: bool,
timeago_tokens: BTreeMap<String, String>,
}
@ -49,7 +53,7 @@ fn read_dict() -> Dictionary {
}
// #[test]
fn t_gen() {
fn generate_dictionary() {
let dict = read_dict();
let code_head = r#"// This file is automatically generated. DO NOT EDIT.

View file

@ -2,13 +2,11 @@
use std::collections::BTreeMap;
use std::path::Path;
use fancy_regex::Regex;
use reqwest::Method;
use serde::{Deserialize, Serialize};
use serde_with::serde_as;
use serde_with::VecSkipError;
use crate::client::response::Icon;
use crate::client::{ClientType, ContextYT, RustyTube};
#[derive(Clone, Debug, Serialize)]
@ -76,6 +74,12 @@ struct CompactLinkRenderer {
service_endpoint: ServiceEndpoint<MenuAction>,
}
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Icon {
pub icon_type: String,
}
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct ServiceEndpoint<T> {
@ -145,56 +149,129 @@ struct LanguageCountryCommand {
async fn generate_locales() {
let (languages, countries) = get_locales().await;
let mut code = "// GENERATED SECTION START //\n".to_owned();
let code_head = r#"// This file is automatically generated. DO NOT EDIT.
use std::{fmt::Display, str::FromStr};
code.push_str("#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)]\n");
code.push_str("#[serde(rename_all = \"kebab-case\")]\n");
code.push_str("pub enum Language {\n");
use serde::{Deserialize, Serialize};
"#;
let code_foot = r#"impl Display for Language {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(
&serde_json::to_string(self).map_or("".to_owned(), |s| s[1..s.len() - 1].to_owned()),
)
}
}
impl Display for Country {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(
&serde_json::to_string(self).map_or("".to_owned(), |s| s[1..s.len() - 1].to_owned()),
)
}
}
impl FromStr for Language {
type Err = serde_json::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
serde_json::from_str(&format!("\"{}\"", s))
}
}
impl FromStr for Country {
type Err = serde_json::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
serde_json::from_str(&format!("\"{}\"", s))
}
}
"#;
let mut code_langs = r#"#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[serde(rename_all = "lowercase")]
pub enum Language {
"#.to_owned();
let mut code_countries = r#"#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[serde(rename_all = "UPPERCASE")]
pub enum Country {
"#.to_owned();
let mut code_lang_array = format!("pub const LANGUAGES: [Language; {}] = [\n", languages.len());
let mut code_country_array = format!("pub const COUNTRIES: [Country; {}] = [\n", countries.len());
let mut code_lang_names = r#"impl Language {
pub fn name(&self) -> &str {
match self {
"#
.to_owned();
let mut code_country_names = r#"impl Country {
pub fn name(&self) -> &str {
match self {
"#
.to_owned();
languages.iter().for_each(|(c, n)| {
code.push_str(&format!(" /// {}\n ", n));
let enum_name = c
.split('-')
.map(|c| {
format!(
"{}{}",
c[0..1].to_owned().to_uppercase(),
c[1..].to_owned().to_lowercase()
)
})
.collect::<String>();
// Language enum
code_langs += &format!(" /// {}\n ", n);
if c.contains('-') {
code.push_str(&format!("#[serde(rename = \"{}\")]\n ", c));
code_langs += &format!("#[serde(rename = \"{}\")]\n ", c);
}
code_langs += &enum_name;
code_langs += ",\n";
c.split('-').for_each(|c| {
code.push_str(&format!(
"{}{}",
c[0..1].to_owned().to_uppercase(),
c[1..].to_owned().to_lowercase()
))
});
code.push_str(",\n");
// Language array
code_lang_array += &format!(" Language::{},\n", enum_name);
// Language names
code_lang_names += &format!(" Language::{} => \"{}\",\n", enum_name, n);
});
code.push_str("}\n\n");
code.push_str("#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)]\n");
code.push_str("#[serde(rename_all = \"SCREAMING_SNAKE_CASE\")]\n");
code.push_str("pub enum Country {\n");
code_langs += "}\n";
countries.iter().for_each(|(c, n)| {
code.push_str(&format!(" /// {}\n", n));
code.push_str(&format!(
" {}{},\n",
c[0..1].to_owned().to_uppercase(),
c[1..].to_owned().to_lowercase()
))
let enum_name = c[0..1].to_owned().to_uppercase() + &c[1..].to_owned().to_lowercase();
// Country enum
code_countries += &format!(" /// {}\n", n);
code_countries += &format!(" {},\n", enum_name);
// Country array
code_country_array += &format!(" Country::{},\n", enum_name);
// Country names
code_country_names += &format!(" Country::{} => \"{}\",\n", enum_name, n);
});
code_countries += "}\n";
code.push_str("}\n");
code_lang_array += "];\n";
code_country_array += "];\n";
code_lang_names += " }\n }\n}\n";
code_country_names += " }\n }\n}\n";
code.push_str("// GENERATED SECTION END //");
let code = format!(
"{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}",
code_head,
code_langs,
code_countries,
code_lang_array,
code_country_array,
code_lang_names,
code_country_names,
code_foot,
);
let locale_path = Path::new("src/model/locale.rs");
let src = std::fs::read_to_string(locale_path).unwrap();
let delim_pattern =
Regex::new("// GENERATED SECTION START //\n[^@]*// GENERATED SECTION END //").unwrap();
let new_src = delim_pattern.replace(&src, code);
std::fs::write(locale_path, new_src.as_bytes()).unwrap();
std::fs::write(locale_path, code).unwrap();
}
async fn get_locales() -> (BTreeMap<String, String>, BTreeMap<String, String>) {

3
src/codegen/mod.rs Normal file
View file

@ -0,0 +1,3 @@
#![cfg(test)]
mod gen_dictionary;
mod gen_locales;

View file

@ -743,19 +743,19 @@ pub(crate) fn get_timeago_tokens(lang: Language) -> phf::Map<&'static str, TaTok
],
},
Language::Ja => ::phf::Map {
key: 14108922650502679131,
key: 15467950696543387533,
disps: &[
(1, 5),
(2, 0),
(0, 0),
(5, 0),
],
entries: &[
("秒前", TaToken { n: 1, unit: Some(TimeUnit::Second) }),
("年前", TaToken { n: 1, unit: Some(TimeUnit::Year) }),
("分前", TaToken { n: 1, unit: Some(TimeUnit::Minute) }),
("時間前", TaToken { n: 1, unit: Some(TimeUnit::Hour) }),
("か月前", TaToken { n: 1, unit: Some(TimeUnit::Month) }),
("日前", TaToken { n: 1, unit: Some(TimeUnit::Day) }),
("週間前", TaToken { n: 1, unit: Some(TimeUnit::Week) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Hour) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Week) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Month) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Day) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Second) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Year) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Minute) }),
],
},
Language::Ka => ::phf::Map {
@ -1607,51 +1607,51 @@ pub(crate) fn get_timeago_tokens(lang: Language) -> phf::Map<&'static str, TaTok
],
},
Language::ZhCn => ::phf::Map {
key: 14108922650502679131,
key: 2980949210194914378,
disps: &[
(1, 3),
(4, 0),
(0, 0),
(2, 1),
],
entries: &[
("钟前", TaToken { n: 1, unit: Some(TimeUnit::Minute) }),
("年前", TaToken { n: 1, unit: Some(TimeUnit::Year) }),
("个月前", TaToken { n: 1, unit: Some(TimeUnit::Month) }),
("小时前", TaToken { n: 1, unit: Some(TimeUnit::Hour) }),
("秒钟前", TaToken { n: 1, unit: Some(TimeUnit::Second) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Day) }),
("周前", TaToken { n: 1, unit: Some(TimeUnit::Week) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Minute) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Second) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Year) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Week) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Month) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Day) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Hour) }),
],
},
Language::ZhHk => ::phf::Map {
key: 12913932095322966823,
key: 15467950696543387533,
disps: &[
(0, 2),
(0, 0),
(0, 3),
(2, 0),
],
entries: &[
("年前", TaToken { n: 1, unit: Some(TimeUnit::Year) }),
("日前", TaToken { n: 1, unit: Some(TimeUnit::Day) }),
("個月前", TaToken { n: 1, unit: Some(TimeUnit::Month) }),
("鐘前", TaToken { n: 1, unit: Some(TimeUnit::Minute) }),
("星期前", TaToken { n: 1, unit: Some(TimeUnit::Week) }),
("秒前", TaToken { n: 1, unit: Some(TimeUnit::Second) }),
("小時前", TaToken { n: 1, unit: Some(TimeUnit::Hour) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Second) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Year) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Hour) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Minute) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Month) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Day) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Week) }),
],
},
Language::ZhTw => ::phf::Map {
key: 15467950696543387533,
key: 10121458955350035957,
disps: &[
(2, 1),
(0, 0),
(5, 0),
(6, 5),
],
entries: &[
("個月前", TaToken { n: 1, unit: Some(TimeUnit::Month) }),
("天前", TaToken { n: 1, unit: Some(TimeUnit::Day) }),
("小時前", TaToken { n: 1, unit: Some(TimeUnit::Hour) }),
("年前", TaToken { n: 1, unit: Some(TimeUnit::Year) }),
("秒前", TaToken { n: 1, unit: Some(TimeUnit::Second) }),
("分鐘前", TaToken { n: 1, unit: Some(TimeUnit::Minute) }),
("週前", TaToken { n: 1, unit: Some(TimeUnit::Week) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Day) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Year) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Second) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Week) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Hour) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Month) }),
("", TaToken { n: 1, unit: Some(TimeUnit::Minute) }),
],
},
Language::Zu => ::phf::Map {

View file

@ -3,11 +3,14 @@
#[macro_use]
mod macros;
#[cfg(test)]
mod codegen;
mod cache;
mod deobfuscate;
mod dictionary;
mod serializer;
mod util;
mod dictionary;
pub mod client;
pub mod download;

View file

@ -1,10 +1,10 @@
// This file is automatically generated. DO NOT EDIT.
use std::{fmt::Display, str::FromStr};
use serde::{Deserialize, Serialize};
// GENERATED SECTION START //
#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[serde(rename_all = "kebab-case")]
#[serde(rename_all = "lowercase")]
pub enum Language {
/// Afrikaans
Af,
@ -185,7 +185,7 @@ pub enum Language {
}
#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
#[serde(rename_all = "UPPERCASE")]
pub enum Country {
/// United Arab Emirates
Ae,
@ -406,7 +406,410 @@ pub enum Country {
/// Zimbabwe
Zw,
}
// GENERATED SECTION END //
pub const LANGUAGES: [Language; 83] = [
Language::Af,
Language::Am,
Language::Ar,
Language::As,
Language::Az,
Language::Be,
Language::Bg,
Language::Bn,
Language::Bs,
Language::Ca,
Language::Cs,
Language::Da,
Language::De,
Language::El,
Language::En,
Language::EnGb,
Language::EnIn,
Language::Es,
Language::Es419,
Language::EsUs,
Language::Et,
Language::Eu,
Language::Fa,
Language::Fi,
Language::Fil,
Language::Fr,
Language::FrCa,
Language::Gl,
Language::Gu,
Language::Hi,
Language::Hr,
Language::Hu,
Language::Hy,
Language::Id,
Language::Is,
Language::It,
Language::Iw,
Language::Ja,
Language::Ka,
Language::Kk,
Language::Km,
Language::Kn,
Language::Ko,
Language::Ky,
Language::Lo,
Language::Lt,
Language::Lv,
Language::Mk,
Language::Ml,
Language::Mn,
Language::Mr,
Language::Ms,
Language::My,
Language::Ne,
Language::Nl,
Language::No,
Language::Or,
Language::Pa,
Language::Pl,
Language::Pt,
Language::PtPt,
Language::Ro,
Language::Ru,
Language::Si,
Language::Sk,
Language::Sl,
Language::Sq,
Language::Sr,
Language::SrLatn,
Language::Sv,
Language::Sw,
Language::Ta,
Language::Te,
Language::Th,
Language::Tr,
Language::Uk,
Language::Ur,
Language::Uz,
Language::Vi,
Language::ZhCn,
Language::ZhHk,
Language::ZhTw,
Language::Zu,
];
pub const COUNTRIES: [Country; 109] = [
Country::Ae,
Country::Ar,
Country::At,
Country::Au,
Country::Az,
Country::Ba,
Country::Bd,
Country::Be,
Country::Bg,
Country::Bh,
Country::Bo,
Country::Br,
Country::By,
Country::Ca,
Country::Ch,
Country::Cl,
Country::Co,
Country::Cr,
Country::Cy,
Country::Cz,
Country::De,
Country::Dk,
Country::Do,
Country::Dz,
Country::Ec,
Country::Ee,
Country::Eg,
Country::Es,
Country::Fi,
Country::Fr,
Country::Gb,
Country::Ge,
Country::Gh,
Country::Gr,
Country::Gt,
Country::Hk,
Country::Hn,
Country::Hr,
Country::Hu,
Country::Id,
Country::Ie,
Country::Il,
Country::In,
Country::Iq,
Country::Is,
Country::It,
Country::Jm,
Country::Jo,
Country::Jp,
Country::Ke,
Country::Kh,
Country::Kr,
Country::Kw,
Country::Kz,
Country::La,
Country::Lb,
Country::Li,
Country::Lk,
Country::Lt,
Country::Lu,
Country::Lv,
Country::Ly,
Country::Ma,
Country::Me,
Country::Mk,
Country::Mt,
Country::Mx,
Country::My,
Country::Ng,
Country::Ni,
Country::Nl,
Country::No,
Country::Np,
Country::Nz,
Country::Om,
Country::Pa,
Country::Pe,
Country::Pg,
Country::Ph,
Country::Pk,
Country::Pl,
Country::Pr,
Country::Pt,
Country::Py,
Country::Qa,
Country::Ro,
Country::Rs,
Country::Ru,
Country::Sa,
Country::Se,
Country::Sg,
Country::Si,
Country::Sk,
Country::Sn,
Country::Sv,
Country::Th,
Country::Tn,
Country::Tr,
Country::Tw,
Country::Tz,
Country::Ua,
Country::Ug,
Country::Us,
Country::Uy,
Country::Ve,
Country::Vn,
Country::Ye,
Country::Za,
Country::Zw,
];
impl Language {
pub fn name(&self) -> &str {
match self {
Language::Af => "Afrikaans",
Language::Am => "አማርኛ",
Language::Ar => "العربية",
Language::As => "অসমীয়া",
Language::Az => "Azərbaycan",
Language::Be => "Беларуская",
Language::Bg => "Български",
Language::Bn => "বাংলা",
Language::Bs => "Bosanski",
Language::Ca => "Català",
Language::Cs => "Čeština",
Language::Da => "Dansk",
Language::De => "Deutsch",
Language::El => "Ελληνικά",
Language::En => "English (US)",
Language::EnGb => "English (UK)",
Language::EnIn => "English (India)",
Language::Es => "Español (España)",
Language::Es419 => "Español (Latinoamérica)",
Language::EsUs => "Español (US)",
Language::Et => "Eesti",
Language::Eu => "Euskara",
Language::Fa => "فارسی",
Language::Fi => "Suomi",
Language::Fil => "Filipino",
Language::Fr => "Français",
Language::FrCa => "Français (Canada)",
Language::Gl => "Galego",
Language::Gu => "ગુજરાતી",
Language::Hi => "हिन्दी",
Language::Hr => "Hrvatski",
Language::Hu => "Magyar",
Language::Hy => "Հայերեն",
Language::Id => "Bahasa Indonesia",
Language::Is => "Íslenska",
Language::It => "Italiano",
Language::Iw => "עברית",
Language::Ja => "日本語",
Language::Ka => "ქართული",
Language::Kk => "Қазақ Тілі",
Language::Km => "ខ្មែរ",
Language::Kn => "ಕನ್ನಡ",
Language::Ko => "한국어",
Language::Ky => "Кыргызча",
Language::Lo => "ລາວ",
Language::Lt => "Lietuvių",
Language::Lv => "Latviešu valoda",
Language::Mk => "Македонски",
Language::Ml => "മലയാളം",
Language::Mn => "Монгол",
Language::Mr => "मराठी",
Language::Ms => "Bahasa Malaysia",
Language::My => "ဗမာ",
Language::Ne => "नेपाली",
Language::Nl => "Nederlands",
Language::No => "Norsk",
Language::Or => "ଓଡ଼ିଆ",
Language::Pa => "ਪੰਜਾਬੀ",
Language::Pl => "Polski",
Language::Pt => "Português (Brasil)",
Language::PtPt => "Português",
Language::Ro => "Română",
Language::Ru => "Русский",
Language::Si => "සිංහල",
Language::Sk => "Slovenčina",
Language::Sl => "Slovenščina",
Language::Sq => "Shqip",
Language::Sr => "Српски",
Language::SrLatn => "Srpski",
Language::Sv => "Svenska",
Language::Sw => "Kiswahili",
Language::Ta => "தமிழ்",
Language::Te => "తెలుగు",
Language::Th => "ภาษาไทย",
Language::Tr => "Türkçe",
Language::Uk => "Українська",
Language::Ur => "اردو",
Language::Uz => "Ozbek",
Language::Vi => "Tiếng Việt",
Language::ZhCn => "中文 (简体)",
Language::ZhHk => "中文 (香港)",
Language::ZhTw => "中文 (繁體)",
Language::Zu => "IsiZulu",
}
}
}
impl Country {
pub fn name(&self) -> &str {
match self {
Country::Ae => "United Arab Emirates",
Country::Ar => "Argentina",
Country::At => "Austria",
Country::Au => "Australia",
Country::Az => "Azerbaijan",
Country::Ba => "Bosnia and Herzegovina",
Country::Bd => "Bangladesh",
Country::Be => "Belgium",
Country::Bg => "Bulgaria",
Country::Bh => "Bahrain",
Country::Bo => "Bolivia",
Country::Br => "Brazil",
Country::By => "Belarus",
Country::Ca => "Canada",
Country::Ch => "Switzerland",
Country::Cl => "Chile",
Country::Co => "Colombia",
Country::Cr => "Costa Rica",
Country::Cy => "Cyprus",
Country::Cz => "Czechia",
Country::De => "Germany",
Country::Dk => "Denmark",
Country::Do => "Dominican Republic",
Country::Dz => "Algeria",
Country::Ec => "Ecuador",
Country::Ee => "Estonia",
Country::Eg => "Egypt",
Country::Es => "Spain",
Country::Fi => "Finland",
Country::Fr => "France",
Country::Gb => "United Kingdom",
Country::Ge => "Georgia",
Country::Gh => "Ghana",
Country::Gr => "Greece",
Country::Gt => "Guatemala",
Country::Hk => "Hong Kong",
Country::Hn => "Honduras",
Country::Hr => "Croatia",
Country::Hu => "Hungary",
Country::Id => "Indonesia",
Country::Ie => "Ireland",
Country::Il => "Israel",
Country::In => "India",
Country::Iq => "Iraq",
Country::Is => "Iceland",
Country::It => "Italy",
Country::Jm => "Jamaica",
Country::Jo => "Jordan",
Country::Jp => "Japan",
Country::Ke => "Kenya",
Country::Kh => "Cambodia",
Country::Kr => "South Korea",
Country::Kw => "Kuwait",
Country::Kz => "Kazakhstan",
Country::La => "Laos",
Country::Lb => "Lebanon",
Country::Li => "Liechtenstein",
Country::Lk => "Sri Lanka",
Country::Lt => "Lithuania",
Country::Lu => "Luxembourg",
Country::Lv => "Latvia",
Country::Ly => "Libya",
Country::Ma => "Morocco",
Country::Me => "Montenegro",
Country::Mk => "North Macedonia",
Country::Mt => "Malta",
Country::Mx => "Mexico",
Country::My => "Malaysia",
Country::Ng => "Nigeria",
Country::Ni => "Nicaragua",
Country::Nl => "Netherlands",
Country::No => "Norway",
Country::Np => "Nepal",
Country::Nz => "New Zealand",
Country::Om => "Oman",
Country::Pa => "Panama",
Country::Pe => "Peru",
Country::Pg => "Papua New Guinea",
Country::Ph => "Philippines",
Country::Pk => "Pakistan",
Country::Pl => "Poland",
Country::Pr => "Puerto Rico",
Country::Pt => "Portugal",
Country::Py => "Paraguay",
Country::Qa => "Qatar",
Country::Ro => "Romania",
Country::Rs => "Serbia",
Country::Ru => "Russia",
Country::Sa => "Saudi Arabia",
Country::Se => "Sweden",
Country::Sg => "Singapore",
Country::Si => "Slovenia",
Country::Sk => "Slovakia",
Country::Sn => "Senegal",
Country::Sv => "El Salvador",
Country::Th => "Thailand",
Country::Tn => "Tunisia",
Country::Tr => "Turkey",
Country::Tw => "Taiwan",
Country::Tz => "Tanzania",
Country::Ua => "Ukraine",
Country::Ug => "Uganda",
Country::Us => "United States",
Country::Uy => "Uruguay",
Country::Ve => "Venezuela",
Country::Vn => "Vietnam",
Country::Ye => "Yemen",
Country::Za => "South Africa",
Country::Zw => "Zimbabwe",
}
}
}
impl Display for Language {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
@ -426,7 +829,6 @@ impl Display for Country {
impl FromStr for Language {
type Err = serde_json::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
serde_json::from_str(&format!("\"{}\"", s))
}
@ -434,7 +836,6 @@ impl FromStr for Language {
impl FromStr for Country {
type Err = serde_json::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
serde_json::from_str(&format!("\"{}\"", s))
}

View file

@ -4,92 +4,6 @@ use serde::{Deserialize, Serialize};
use crate::{dictionary, model::Language, util};
pub const LANGUAGES: [Language; 83] = [
Language::Af,
Language::Am,
Language::Ar,
Language::As,
Language::Az,
Language::Be,
Language::Bg,
Language::Bn,
Language::Bs,
Language::Ca,
Language::Cs,
Language::Da,
Language::De,
Language::El,
Language::En,
Language::EnGb,
Language::EnIn,
Language::Es,
Language::Es419,
Language::EsUs,
Language::Et,
Language::Eu,
Language::Fa,
Language::Fi,
Language::Fil,
Language::Fr,
Language::FrCa,
Language::Gl,
Language::Gu,
Language::Hi,
Language::Hr,
Language::Hu,
Language::Hy,
Language::Id,
Language::Is,
Language::It,
Language::Iw,
Language::Ja,
Language::Ka,
Language::Kk,
Language::Km,
Language::Kn,
Language::Ko,
Language::Ky,
Language::Lo,
Language::Lt,
Language::Lv,
Language::Mk,
Language::Ml,
Language::Mn,
Language::Mr,
Language::Ms,
Language::My,
Language::Ne,
Language::Nl,
Language::No,
Language::Or,
Language::Pa,
Language::Pl,
Language::Pt,
Language::PtPt,
Language::Ro,
Language::Ru,
Language::Si,
Language::Sk,
Language::Sl,
Language::Sq,
Language::Sr,
Language::SrLatn,
Language::Sv,
Language::Sw,
Language::Ta,
Language::Te,
Language::Th,
Language::Tr,
Language::Uk,
Language::Ur,
Language::Uz,
Language::Vi,
Language::ZhCn,
Language::ZhHk,
Language::ZhTw,
Language::Zu,
];
#[derive(Debug, Copy, Clone, Serialize, Deserialize, Eq)]
pub struct TimeAgo {
pub n: u8,
@ -162,18 +76,35 @@ pub fn parse(lang: Language, textual_date: &str) -> Option<TimeAgo> {
.collect::<String>();
let mut qu: u8 = util::parse_numeric(&textual_date).unwrap_or(1);
filtered_str.split(' ').find_map(|word| {
mappings
.get(word)
.map(|t| match t.unit {
Some(unit) => Some(TimeAgo { n: t.n * qu, unit }),
None => {
qu = t.n;
None
}
match lang {
Language::Ja | Language::ZhCn | Language::ZhHk | Language::ZhTw => {
filtered_str.chars().find_map(|word| {
mappings
.get(&word.to_string())
.map(|t| match t.unit {
Some(unit) => Some(TimeAgo { n: t.n * qu, unit }),
None => {
qu = t.n;
None
}
})
.flatten()
})
.flatten()
})
}
_ => filtered_str.split(' ').find_map(|word| {
mappings
.get(word)
.map(|t| match t.unit {
Some(unit) => Some(TimeAgo { n: t.n * qu, unit }),
None => {
qu = t.n;
None
}
})
.flatten()
}),
}
}
#[cfg(test)]

View file

@ -548,14 +548,15 @@
}
},
"ja": {
"by_char": true,
"timeago_tokens": {
"か月前": "M",
"分": "m",
"年": "Y",
"日": "D",
"時間前": "h",
"秒": "s",
"週間前": "W"
"": "M",
"分": "m",
"年": "Y",
"日": "D",
"時": "h",
"秒": "s",
"週": "W"
}
},
"ka": {
@ -1180,36 +1181,39 @@
}
},
"zh-CN": {
"by_char": true,
"timeago_tokens": {
"个月前": "M",
"分钟前": "m",
"周": "W",
"天": "D",
"小时前": "h",
"年": "Y",
"秒钟前": "s"
"": "M",
"分": "m",
"周": "W",
"天": "D",
"小": "h",
"年": "Y",
"秒": "s"
}
},
"zh-HK": {
"by_char": true,
"timeago_tokens": {
"個月前": "M",
"分鐘前": "m",
"小時前": "h",
"年": "Y",
"日": "D",
"星期前": "W",
"秒": "s"
"": "M",
"分": "m",
"小": "h",
"年": "Y",
"日": "D",
"": "W",
"秒": "s"
}
},
"zh-TW": {
"by_char": true,
"timeago_tokens": {
"個月前": "M",
"分鐘前": "m",
"天": "D",
"小時前": "h",
"年": "Y",
"秒": "s",
"週": "W"
"": "M",
"分": "m",
"天": "D",
"小": "h",
"年": "Y",
"秒": "s",
"週": "W"
}
},
"zu": {