From 513bf1dc9c78dbfe9cf7c8539e983dc49ff7ae58 Mon Sep 17 00:00:00 2001 From: ThetaDev Date: Mon, 5 Sep 2022 21:43:43 +0200 Subject: [PATCH] finish timeago parser, refactor codegen --- notes/video_ids.txt | 10 +- src/client/mod.rs | 5 +- src/client/scripts/mod.rs | 3 - src/client/scripts/timeago_testfiles.rs | 671 ------------------ {tests => src/codegen}/gen_dictionary.rs | 10 +- .../gen_locales.rs} | 151 +++- src/codegen/mod.rs | 3 + src/dictionary.rs | 80 +-- src/lib.rs | 5 +- src/model/locale.rs | 413 ++++++++++- src/timeago.rs | 125 +--- testfiles/date/dictionary.json | 60 +- 12 files changed, 641 insertions(+), 895 deletions(-) delete mode 100644 src/client/scripts/mod.rs delete mode 100644 src/client/scripts/timeago_testfiles.rs rename {tests => src/codegen}/gen_dictionary.rs (95%) rename src/{client/scripts/language_menu.rs => codegen/gen_locales.rs} (61%) create mode 100644 src/codegen/mod.rs diff --git a/notes/video_ids.txt b/notes/video_ids.txt index 0332f56..3859881 100644 --- a/notes/video_ids.txt +++ b/notes/video_ids.txt @@ -29,12 +29,12 @@ Throttling issue: Y8JFxS1HlDo 4.657 Songs: PLI_eFW8NAFzYAXZ5DrU6E6mQ_XfhaLBUX 186 Songs: PLbZIPy20-1pN7mqjckepWF78ndb6ci_qi -Playlist update dates: +Playlist update dates (05.09.2022): today: RDCLAK5uy_kj3rhiar1LINmyDcuFnXihEO0K1NQa2jI -yesterday: PL3-sRm8xAzY9sDilvaWjCwCI0TkUzYdOG -2 days ago: PL3qHjxSSl7AER3rxfEr4SiHNr-ihbQyqU -3 days ago: PLHr0jWPfopte182N54r1ra7tkRJC1fmPu -5 days ago: PLF7B92F492FDAE703 +yesterday: PL4C44E2875308A280 +2 days ago: PL7zsB-C3aNu2yRY2869T0zj1FhtRIu5am +5 days ago: PL3-sRm8xAzY9sDilvaWjCwCI0TkUzYdOG +7 days ago: PLHr0jWPfopte182N54r1ra7tkRJC1fmPu Jan PL1J-6JOckZtHxTA3hN5SK7gBQaFfKzeXr 01.01.2016 Feb PL1J-6JOckZtETrbzwZE7mRIIK6BzWNLAs 07.02.2016 diff --git a/src/client/mod.rs b/src/client/mod.rs index 7770147..32c7f9e 100644 --- a/src/client/mod.rs +++ b/src/client/mod.rs @@ -4,9 +4,6 @@ pub mod video; mod response; -#[cfg(test)] -mod scripts; - use std::sync::Arc; use anyhow::{anyhow, Context, Result}; @@ -190,7 +187,7 @@ impl RustyTube { } } - fn get_ytclient(&self, client_type: ClientType) -> Arc { + pub(crate) fn get_ytclient(&self, client_type: ClientType) -> Arc { match client_type { ClientType::Desktop => self.desktop_client.clone(), ClientType::DesktopMusic => self.desktop_music_client.clone(), diff --git a/src/client/scripts/mod.rs b/src/client/scripts/mod.rs deleted file mode 100644 index 53b824d..0000000 --- a/src/client/scripts/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -#![cfg(test)] -mod language_menu; -mod timeago_testfiles; diff --git a/src/client/scripts/timeago_testfiles.rs b/src/client/scripts/timeago_testfiles.rs deleted file mode 100644 index 80036a7..0000000 --- a/src/client/scripts/timeago_testfiles.rs +++ /dev/null @@ -1,671 +0,0 @@ -#![cfg(test)] - -use std::{ - collections::{BTreeMap, HashSet}, - fs::File, - io::BufReader, - path::Path, -}; - -use fancy_regex::Regex; -use futures::{stream, StreamExt}; -use intl_pluralrules::{PluralCategory, PluralRuleType, PluralRules}; -use log::{error, info}; -use once_cell::sync::Lazy; -use reqwest::Method; -use serde::{Deserialize, Serialize}; -use unic_langid::LanguageIdentifier; - -use crate::{ - client::{response, ClientType, ContextYT, RustyTube}, - model::{Country, Language}, - timeago::{self, TimeUnit, TimeagoPattern, LANGUAGES}, -}; - -#[derive(Clone, Debug, Serialize)] -#[serde(rename_all = "camelCase")] -struct QChannel { - context: ContextYT, - browse_id: String, - params: String, -} - -async fn get_channel_datestrings(rp: &RustyTube, channel_id: &str) -> Vec { - let client = rp.get_ytclient(ClientType::Desktop); - let context = client.get_context(true).await; - - let request_body = QChannel { - context, - browse_id: channel_id.to_owned(), - params: "EgZ2aWRlb3PyBgQKAjoA".to_owned(), - }; - - let resp = client - .request_builder(Method::POST, "browse") - .await - .json(&request_body) - .send() - .await - .unwrap() - .error_for_status() - .unwrap(); - - let channel_response = resp.json::().await.unwrap(); - - channel_response - .contents - .two_column_browse_results_renderer - .tabs[0] - .tab_renderer - .content - .section_list_renderer - .contents[0] - .item_section_renderer - .contents[0] - .grid_renderer - .items - .iter() - .filter_map(|itm| match itm { - response::VideoListItem::GridVideoRenderer { video } => { - video.published_time_text.to_owned() - } - response::VideoListItem::ContinuationItemRenderer { .. } => None, - }) - .collect::>() -} - -async fn get_comment_initial_ctoken(rp: &RustyTube, video_id: &str, latest: bool) -> String { - let video_response = rp.get_video_response(video_id).await.unwrap(); - - match latest { - true => video_response - .engagement_panels - .iter() - .find_map(|p| { - p.engagement_panel_section_list_renderer - .header - .engagement_panel_title_header_renderer - .menu - .sort_filter_sub_menu_renderer - .sub_menu_items - .get(1) - .map(|i| i.service_endpoint.continuation_command.token.to_owned()) - }) - .unwrap(), - false => video_response - .contents - .two_column_watch_next_results - .results - .results - .contents - .iter() - .find_map(|c| match c { - response::video::VideoResultsItem::ItemSectionRenderer { - contents, - section_identifier, - } => match section_identifier == "comment-item-section" { - true => match &contents[0] { - response::video::ItemSection::ContinuationItemRenderer { - continuation_endpoint, - } => Some(continuation_endpoint.continuation_command.token.to_owned()), - _ => None, - }, - false => None, - }, - _ => None, - }) - .unwrap(), - } -} - -async fn get_comment_datestrings(rp: &RustyTube, ctoken: &str) -> (Vec, Option) { - let comments_response = rp.get_comments_response(ctoken).await.unwrap(); - - let mut next_ctoken: Option = None; - let datestrings = comments_response - .on_response_received_endpoints - /* - .iter() - .find(|e| { - !e.append_continuation_items_action - .continuation_items - .is_empty() - && matches!( - &e.append_continuation_items_action.continuation_items[0], - CommentListItem::CommentsHeaderRenderer { count_text } - ) - }) - .unwrap() - */ - .iter() - .rev() - .next() - .unwrap() - .append_continuation_items_action - .continuation_items - .iter() - .filter_map(|itm| match itm { - response::video::CommentListItem::CommentThreadRenderer { comment, .. } => { - Some(comment.comment_renderer.published_time_text.to_owned()) - } - response::video::CommentListItem::ContinuationItemRenderer { - continuation_endpoint, - } => { - next_ctoken = Some(continuation_endpoint.continuation_command.token.to_owned()); - None - } - _ => None, - }) - .collect::>(); - - (datestrings, next_ctoken) -} - -// #[test_log::test(tokio::test)] -#[allow(dead_code)] -async fn download_timeago_testfiles() { - let json_path = Path::new("testfiles/date/timeago_samples.json").to_path_buf(); - if json_path.exists() { - return; - } - - let channel_ids = [ - "UCeY0bbntWzzVIaj2z3QigXg", - "UCcmpeVbSSQlZRvHfdC-CRwg", - "UC65afEgL62PGFWXY7n6CUbA", - "UCEOXxzW2vU0P-0THehuIIeg", - ]; - - // Get strings of all languages - let mut lang_strings: BTreeMap> = BTreeMap::new(); - for lang in timeago::LANGUAGES { - let rp = RustyTube::new_with_ua(lang, Country::Us, None); - let strings = stream::iter(channel_ids) - .map(|id| get_channel_datestrings(&rp, id)) - .buffered(4) - .collect::>() - .await - .into_iter() - .flatten() - .collect::>(); - - lang_strings.insert(lang, strings); - } - - let mut en_strings_uniq: HashSet<&str> = HashSet::new(); - let mut uniq_ids: HashSet = HashSet::new(); - - lang_strings[&Language::En] - .iter() - .enumerate() - .for_each(|(n, s)| { - if en_strings_uniq.insert(s) { - uniq_ids.insert(n); - } - }); - - let strings_map = lang_strings - .iter() - .map(|(lang, strings)| { - ( - lang, - strings - .iter() - .enumerate() - .filter(|(n, _)| uniq_ids.contains(n)) - .map(|(_, s)| s) - .collect::>(), - ) - }) - .collect::>(); - - let file = File::create(json_path).unwrap(); - serde_json::to_writer_pretty(file, &strings_map).unwrap(); -} - -#[derive(Debug, Clone, Deserialize)] -struct PluralRulesData { - supplemental: PluralRulesInner, -} - -#[derive(Debug, Clone, Deserialize)] -#[serde(rename_all = "kebab-case")] -struct PluralRulesInner { - plurals_type_cardinal: BTreeMap, -} - -#[derive(Debug, Clone, Deserialize)] -struct Ruleset { - #[serde(rename = "pluralRule-count-one")] - one: Option, - #[serde(rename = "pluralRule-count-two")] - two: Option, - #[serde(rename = "pluralRule-count-few")] - few: Option, - #[serde(rename = "pluralRule-count-many")] - many: Option, - #[serde(rename = "pluralRule-count-other")] - other: Option, -} - -#[derive(Debug, Copy, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] -enum PluralCat { - Zero, - One, - Two, - Few, - Many, - Other, -} - -impl From for PluralCat { - fn from(value: PluralCategory) -> Self { - match value { - PluralCategory::ZERO => Self::Zero, - PluralCategory::ONE => Self::One, - PluralCategory::TWO => Self::Two, - PluralCategory::FEW => Self::Few, - PluralCategory::MANY => Self::Many, - PluralCategory::OTHER => Self::Other, - } - } -} - -static PLURAL_RULES: Lazy>> = Lazy::new(|| { - let json_path = Path::new("testfiles/date/cldr_pluralrules_cardinals.json"); - let json_file = File::open(json_path).unwrap(); - - serde_json::from_reader::<_, PluralRulesData>(BufReader::new(json_file)) - .unwrap() - .supplemental - .plurals_type_cardinal - .iter() - .map(|(lang, rules)| { - let mut hs: HashSet = HashSet::new(); - - if rules.one.is_some() { - hs.insert(PluralCat::One); - } - if rules.two.is_some() { - hs.insert(PluralCat::Two); - } - if rules.few.is_some() { - hs.insert(PluralCat::Few); - } - if rules.many.is_some() { - hs.insert(PluralCat::Many); - } - if rules.other.is_some() { - hs.insert(PluralCat::Other); - } - - (lang.to_owned(), hs) - }) - .collect::>() -}); - -#[derive(Debug, Clone, Serialize, Deserialize)] -struct TimeagoTable { - entries: BTreeMap>, - errors: BTreeMap>, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -struct TimeagoTableEntry { - cases: BTreeMap, - missing_plurals: HashSet, -} - -const TIME_UNITS: [TimeUnit; 7] = [ - TimeUnit::Second, - TimeUnit::Minute, - TimeUnit::Hour, - TimeUnit::Day, - TimeUnit::Week, - TimeUnit::Month, - TimeUnit::Year, -]; - -fn new_timeago_table() -> TimeagoTable { - TimeagoTable { - entries: LANGUAGES - .iter() - .filter_map(|lang| { - // Check if language is redundant - match lang { - Language::EnGb - | Language::EnIn - | Language::FrCa - | Language::EsUs - | Language::Es419 => None, - _ => { - let cldr_lang_str = match lang { - Language::SrLatn => "sr".to_owned(), - Language::ZhCn | Language::ZhHk | Language::ZhTw => "zh".to_owned(), - _ => lang.to_string(), - }; - - let m = TIME_UNITS - .iter() - .map(|t| { - let missing_plurals = if t == &TimeUnit::Week { - // Week only has 3 valid values (2-4) - let mut mp = HashSet::new(); - - let l_id = cldr_lang_str.parse::().unwrap(); - let pr = PluralRules::create(l_id, PluralRuleType::CARDINAL) - .unwrap(); - - mp.insert(PluralCat::from(pr.select(2).unwrap())); - mp.insert(PluralCat::from(pr.select(3).unwrap())); - mp.insert(PluralCat::from(pr.select(4).unwrap())); - - mp - } else { - PLURAL_RULES.get(&cldr_lang_str).unwrap().clone() - }; - - ( - t.to_owned(), - TimeagoTableEntry { - cases: BTreeMap::new(), - missing_plurals, - }, - ) - }) - .collect(); - - Some((lang.to_owned(), m)) - } - } - }) - .collect(), - errors: BTreeMap::new(), - } -} - -fn read_timeago_table() -> TimeagoTable { - let json_path = Path::new("testfiles/date/timeago_table.json").to_path_buf(); - if json_path.exists() { - let file = File::open(json_path).unwrap(); - serde_json::from_reader(BufReader::new(file)).unwrap() - } else { - new_timeago_table() - } -} - -fn write_timeago_table(timeago_table: &TimeagoTable) { - let json_path = Path::new("testfiles/date/timeago_table.json").to_path_buf(); - let file = File::create(json_path).unwrap(); - serde_json::to_writer_pretty(file, timeago_table).unwrap(); -} - -fn insert_timeago_table( - timeago_table: &mut TimeagoTable, - lang: &Language, - date_str: &str, - limit: Option, - ignore_1s: bool, -) -> bool { - let pattern = TimeagoPattern::from(lang.to_owned()); - match pattern.parse(date_str) { - Some(timeago) => { - let entry = timeago_table - .entries - .get_mut(lang) - .unwrap() - .get_mut(&timeago.unit) - .unwrap(); - - let cldr_lang_str = &lang.to_string()[0..2]; - let l_id: LanguageIdentifier = cldr_lang_str.parse().unwrap(); - let pl_pat = PluralRules::create(l_id, PluralRuleType::CARDINAL).unwrap(); - let pl = PluralCat::from(pl_pat.select(timeago.n).unwrap()); - - // Collect the case if its plural type is missing - if entry.missing_plurals.remove(&pl) { - entry.cases.insert(date_str.to_owned(), timeago.n); - info!( - "Collected `{}` ({} {:?})", - date_str, timeago.n, timeago.unit - ); - } - - timeago_table - .entries - .get(lang) - .unwrap() - .iter() - .all(|(t, entry)| { - (limit.is_some() && t > &limit.unwrap()) - || entry.missing_plurals.is_empty() - || (ignore_1s - && t == &TimeUnit::Second - && entry.missing_plurals.len() == 1 - && entry.missing_plurals.contains(&PluralCat::One)) - }) - } - None => { - error!("Could not parse `{}`", date_str); - let errors = timeago_table - .errors - .entry(*lang) - .or_insert_with(|| HashSet::new()); - errors.insert(date_str.to_owned()); - false - } - } -} - -async fn insert_timeago_table_datestrings( - rp: &RustyTube, - timeago_table: &mut TimeagoTable, - video_id: &str, - latest: bool, - limit: Option, - ignore_1s: bool, -) { - let mut ctoken = get_comment_initial_ctoken(&rp, video_id, latest).await; - let brace_pattern = Regex::new(r"\(.+\)").unwrap(); - let lang = &rp.localization.language; - let err_baseline = timeago_table.errors.len(); - - for _ in 0..40 { - let (strings, new_ctoken) = get_comment_datestrings(&rp, &ctoken).await; - - let res = strings - .iter() - .map(|s| { - // Remove zero-width space characters - let s = s.replace('\u{200b}', ""); - - // Remove braces - let s = brace_pattern.replace(&s, ""); - - let s = s.trim(); - s.to_owned() - }) - .find(|s| insert_timeago_table(timeago_table, lang, &s, limit, ignore_1s)); - - if res.is_some() { - break; - } - - if timeago_table.errors.len() > err_baseline { - return; - } - - if let Some(new_ctoken) = new_ctoken { - ctoken = new_ctoken.to_owned(); - } else { - error!("end of comments"); - break; - } - } -} - -async fn insert_timeago_table_datestrings_channel( - rp: &RustyTube, - timeago_table: &mut TimeagoTable, - channel_id: &str, -) { - let lang = &rp.localization.language; - - let strings = get_channel_datestrings(rp, channel_id).await; - - strings - .iter() - .map(|s| { - // Remove zero-width space characters - let s = s.replace('\u{200b}', ""); - - let s = s.trim(); - s.to_owned() - }) - .for_each(|s| { - insert_timeago_table(timeago_table, lang, &s, None, false); - }); -} - -// #[test_log::test(tokio::test)] -#[allow(dead_code)] -async fn t_build_timeago_table() { - let mut timeago_table = read_timeago_table(); - let ignore_1s = false; - let langs = timeago_table - .entries - .keys() - .map(|k| k.to_owned()) - .collect::>(); - - for lang in langs { - if timeago_table - .entries - .get(&lang) - .unwrap() - .iter() - .all(|(t, entry)| { - entry.missing_plurals.is_empty() - || (ignore_1s - && t == &TimeUnit::Second - && entry.missing_plurals.len() == 1 - && entry.missing_plurals.contains(&PluralCat::One)) - }) - { - continue; - } - - let rp = RustyTube::new_with_ua(lang, Country::Us, None); - - println!("{}: 1s!", lang); - { - let ctoken = get_comment_initial_ctoken(&rp, "gQlMMD8auMs", true).await; - // let ctoken = get_comment_initial_ctoken(&rp, "k6jqx9kZgPM", true).await; - let brace_pattern = Regex::new(r"\(.+\)").unwrap(); - let lang = &rp.localization.language; - let err_baseline = timeago_table.errors.len(); - - loop { - let (strings, _) = get_comment_datestrings(&rp, &ctoken).await; - println!("{}", strings[0]); - - let res = strings - .iter() - .map(|s| { - // Remove zero-width space characters - let s = s.replace('\u{200b}', ""); - - // Remove braces - let s = brace_pattern.replace(&s, ""); - - let s = s.trim(); - s.to_owned() - }) - .find(|s| { - insert_timeago_table( - &mut timeago_table, - lang, - &s, - Some(TimeUnit::Second), - ignore_1s, - ) - }); - - if res.is_some() { - break; - } - - if timeago_table.errors.len() > err_baseline { - break; - } - } - } - - println!("{}: 2s - n min", lang); - insert_timeago_table_datestrings( - &rp, - &mut timeago_table, - "gQlMMD8auMs", - true, - Some(TimeUnit::Minute), - ignore_1s, - ) - .await; - println!("{}: x hr", lang); - insert_timeago_table_datestrings( - &rp, - &mut timeago_table, - "TohrPm3ICJE", - true, - Some(TimeUnit::Hour), - ignore_1s, - ) - .await; - println!("{}: 1 hr - n day", lang); - insert_timeago_table_datestrings( - &rp, - &mut timeago_table, - "J9NQFACZYEU", - true, - Some(TimeUnit::Day), - ignore_1s, - ) - .await; - println!("{}: week", lang); - insert_timeago_table_datestrings( - &rp, - &mut timeago_table, - "-zPDx6HQ_9w", - true, - Some(TimeUnit::Week), - ignore_1s, - ) - .await; - - println!("{}: 1 yr - n yr", lang); - insert_timeago_table_datestrings_channel( - &rp, - &mut timeago_table, - "UCEOXxzW2vU0P-0THehuIIeg", - ) - .await; - - println!("{}: 11 mon", lang); - insert_timeago_table_datestrings_channel( - &rp, - &mut timeago_table, - "UCY1kMZp36IQSyNx_9h4mpCg", - ) - .await; - - println!("{}: 13 yr", lang); - insert_timeago_table_datestrings_channel( - &rp, - &mut timeago_table, - "UCfw6qEAJMDbmgqQbuoB5moA", - ) - .await; - - write_timeago_table(&timeago_table); - } -} diff --git a/tests/gen_dictionary.rs b/src/codegen/gen_dictionary.rs similarity index 95% rename from tests/gen_dictionary.rs rename to src/codegen/gen_dictionary.rs index fbb38d8..5a0154d 100644 --- a/tests/gen_dictionary.rs +++ b/src/codegen/gen_dictionary.rs @@ -1,13 +1,15 @@ +#![cfg(test)] + use std::{ collections::BTreeMap, fmt::Debug, fs::File, - io::{BufReader, BufWriter, Write}, path::Path, + io::{BufReader}, }; +use crate::{model::Language, timeago::TimeUnit}; use fancy_regex::Regex; use once_cell::sync::Lazy; -use rustypipe::{model::Language, timeago::TimeUnit}; use serde::Deserialize; const DICT_PATH: &str = "testfiles/date/dictionary.json"; @@ -19,6 +21,8 @@ type Dictionary = BTreeMap; struct DictEntry { #[serde(default)] equivalent: Vec, + #[serde(default)] + by_char: bool, timeago_tokens: BTreeMap, } @@ -49,7 +53,7 @@ fn read_dict() -> Dictionary { } // #[test] -fn t_gen() { +fn generate_dictionary() { let dict = read_dict(); let code_head = r#"// This file is automatically generated. DO NOT EDIT. diff --git a/src/client/scripts/language_menu.rs b/src/codegen/gen_locales.rs similarity index 61% rename from src/client/scripts/language_menu.rs rename to src/codegen/gen_locales.rs index c8390cc..1225a05 100644 --- a/src/client/scripts/language_menu.rs +++ b/src/codegen/gen_locales.rs @@ -2,13 +2,11 @@ use std::collections::BTreeMap; use std::path::Path; -use fancy_regex::Regex; use reqwest::Method; use serde::{Deserialize, Serialize}; use serde_with::serde_as; use serde_with::VecSkipError; -use crate::client::response::Icon; use crate::client::{ClientType, ContextYT, RustyTube}; #[derive(Clone, Debug, Serialize)] @@ -76,6 +74,12 @@ struct CompactLinkRenderer { service_endpoint: ServiceEndpoint, } +#[derive(Clone, Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct Icon { + pub icon_type: String, +} + #[derive(Clone, Debug, Deserialize)] #[serde(rename_all = "camelCase")] struct ServiceEndpoint { @@ -145,56 +149,129 @@ struct LanguageCountryCommand { async fn generate_locales() { let (languages, countries) = get_locales().await; - let mut code = "// GENERATED SECTION START //\n".to_owned(); + let code_head = r#"// This file is automatically generated. DO NOT EDIT. +use std::{fmt::Display, str::FromStr}; - code.push_str("#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)]\n"); - code.push_str("#[serde(rename_all = \"kebab-case\")]\n"); - code.push_str("pub enum Language {\n"); +use serde::{Deserialize, Serialize}; +"#; + + let code_foot = r#"impl Display for Language { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str( + &serde_json::to_string(self).map_or("".to_owned(), |s| s[1..s.len() - 1].to_owned()), + ) + } +} + +impl Display for Country { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str( + &serde_json::to_string(self).map_or("".to_owned(), |s| s[1..s.len() - 1].to_owned()), + ) + } +} + +impl FromStr for Language { + type Err = serde_json::Error; + fn from_str(s: &str) -> Result { + serde_json::from_str(&format!("\"{}\"", s)) + } +} + +impl FromStr for Country { + type Err = serde_json::Error; + fn from_str(s: &str) -> Result { + serde_json::from_str(&format!("\"{}\"", s)) + } +} +"#; + + let mut code_langs = r#"#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[serde(rename_all = "lowercase")] +pub enum Language { +"#.to_owned(); + + let mut code_countries = r#"#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[serde(rename_all = "UPPERCASE")] +pub enum Country { +"#.to_owned(); + + let mut code_lang_array = format!("pub const LANGUAGES: [Language; {}] = [\n", languages.len()); + let mut code_country_array = format!("pub const COUNTRIES: [Country; {}] = [\n", countries.len()); + + let mut code_lang_names = r#"impl Language { + pub fn name(&self) -> &str { + match self { +"# + .to_owned(); + let mut code_country_names = r#"impl Country { + pub fn name(&self) -> &str { + match self { +"# + .to_owned(); languages.iter().for_each(|(c, n)| { - code.push_str(&format!(" /// {}\n ", n)); + let enum_name = c + .split('-') + .map(|c| { + format!( + "{}{}", + c[0..1].to_owned().to_uppercase(), + c[1..].to_owned().to_lowercase() + ) + }) + .collect::(); + // Language enum + code_langs += &format!(" /// {}\n ", n); if c.contains('-') { - code.push_str(&format!("#[serde(rename = \"{}\")]\n ", c)); + code_langs += &format!("#[serde(rename = \"{}\")]\n ", c); } + code_langs += &enum_name; + code_langs += ",\n"; - c.split('-').for_each(|c| { - code.push_str(&format!( - "{}{}", - c[0..1].to_owned().to_uppercase(), - c[1..].to_owned().to_lowercase() - )) - }); - code.push_str(",\n"); + // Language array + code_lang_array += &format!(" Language::{},\n", enum_name); + + // Language names + code_lang_names += &format!(" Language::{} => \"{}\",\n", enum_name, n); }); - - code.push_str("}\n\n"); - - code.push_str("#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)]\n"); - code.push_str("#[serde(rename_all = \"SCREAMING_SNAKE_CASE\")]\n"); - code.push_str("pub enum Country {\n"); + code_langs += "}\n"; countries.iter().for_each(|(c, n)| { - code.push_str(&format!(" /// {}\n", n)); - code.push_str(&format!( - " {}{},\n", - c[0..1].to_owned().to_uppercase(), - c[1..].to_owned().to_lowercase() - )) + let enum_name = c[0..1].to_owned().to_uppercase() + &c[1..].to_owned().to_lowercase(); + + // Country enum + code_countries += &format!(" /// {}\n", n); + code_countries += &format!(" {},\n", enum_name); + + // Country array + code_country_array += &format!(" Country::{},\n", enum_name); + + // Country names + code_country_names += &format!(" Country::{} => \"{}\",\n", enum_name, n); }); + code_countries += "}\n"; - code.push_str("}\n"); + code_lang_array += "];\n"; + code_country_array += "];\n"; + code_lang_names += " }\n }\n}\n"; + code_country_names += " }\n }\n}\n"; - code.push_str("// GENERATED SECTION END //"); + let code = format!( + "{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}", + code_head, + code_langs, + code_countries, + code_lang_array, + code_country_array, + code_lang_names, + code_country_names, + code_foot, + ); let locale_path = Path::new("src/model/locale.rs"); - let src = std::fs::read_to_string(locale_path).unwrap(); - - let delim_pattern = - Regex::new("// GENERATED SECTION START //\n[^@]*// GENERATED SECTION END //").unwrap(); - - let new_src = delim_pattern.replace(&src, code); - std::fs::write(locale_path, new_src.as_bytes()).unwrap(); + std::fs::write(locale_path, code).unwrap(); } async fn get_locales() -> (BTreeMap, BTreeMap) { diff --git a/src/codegen/mod.rs b/src/codegen/mod.rs new file mode 100644 index 0000000..16ded1f --- /dev/null +++ b/src/codegen/mod.rs @@ -0,0 +1,3 @@ +#![cfg(test)] +mod gen_dictionary; +mod gen_locales; diff --git a/src/dictionary.rs b/src/dictionary.rs index 8291268..4b4b231 100644 --- a/src/dictionary.rs +++ b/src/dictionary.rs @@ -743,19 +743,19 @@ pub(crate) fn get_timeago_tokens(lang: Language) -> phf::Map<&'static str, TaTok ], }, Language::Ja => ::phf::Map { - key: 14108922650502679131, + key: 15467950696543387533, disps: &[ - (1, 5), - (2, 0), + (0, 0), + (5, 0), ], entries: &[ - ("秒前", TaToken { n: 1, unit: Some(TimeUnit::Second) }), - ("年前", TaToken { n: 1, unit: Some(TimeUnit::Year) }), - ("分前", TaToken { n: 1, unit: Some(TimeUnit::Minute) }), - ("時間前", TaToken { n: 1, unit: Some(TimeUnit::Hour) }), - ("か月前", TaToken { n: 1, unit: Some(TimeUnit::Month) }), - ("日前", TaToken { n: 1, unit: Some(TimeUnit::Day) }), - ("週間前", TaToken { n: 1, unit: Some(TimeUnit::Week) }), + ("時", TaToken { n: 1, unit: Some(TimeUnit::Hour) }), + ("週", TaToken { n: 1, unit: Some(TimeUnit::Week) }), + ("月", TaToken { n: 1, unit: Some(TimeUnit::Month) }), + ("日", TaToken { n: 1, unit: Some(TimeUnit::Day) }), + ("秒", TaToken { n: 1, unit: Some(TimeUnit::Second) }), + ("年", TaToken { n: 1, unit: Some(TimeUnit::Year) }), + ("分", TaToken { n: 1, unit: Some(TimeUnit::Minute) }), ], }, Language::Ka => ::phf::Map { @@ -1607,51 +1607,51 @@ pub(crate) fn get_timeago_tokens(lang: Language) -> phf::Map<&'static str, TaTok ], }, Language::ZhCn => ::phf::Map { - key: 14108922650502679131, + key: 2980949210194914378, disps: &[ - (1, 3), - (4, 0), + (0, 0), + (2, 1), ], entries: &[ - ("分钟前", TaToken { n: 1, unit: Some(TimeUnit::Minute) }), - ("年前", TaToken { n: 1, unit: Some(TimeUnit::Year) }), - ("个月前", TaToken { n: 1, unit: Some(TimeUnit::Month) }), - ("小时前", TaToken { n: 1, unit: Some(TimeUnit::Hour) }), - ("秒钟前", TaToken { n: 1, unit: Some(TimeUnit::Second) }), - ("天前", TaToken { n: 1, unit: Some(TimeUnit::Day) }), - ("周前", TaToken { n: 1, unit: Some(TimeUnit::Week) }), + ("分", TaToken { n: 1, unit: Some(TimeUnit::Minute) }), + ("秒", TaToken { n: 1, unit: Some(TimeUnit::Second) }), + ("年", TaToken { n: 1, unit: Some(TimeUnit::Year) }), + ("周", TaToken { n: 1, unit: Some(TimeUnit::Week) }), + ("月", TaToken { n: 1, unit: Some(TimeUnit::Month) }), + ("天", TaToken { n: 1, unit: Some(TimeUnit::Day) }), + ("小", TaToken { n: 1, unit: Some(TimeUnit::Hour) }), ], }, Language::ZhHk => ::phf::Map { - key: 12913932095322966823, + key: 15467950696543387533, disps: &[ - (0, 2), - (0, 0), + (0, 3), + (2, 0), ], entries: &[ - ("年前", TaToken { n: 1, unit: Some(TimeUnit::Year) }), - ("日前", TaToken { n: 1, unit: Some(TimeUnit::Day) }), - ("個月前", TaToken { n: 1, unit: Some(TimeUnit::Month) }), - ("分鐘前", TaToken { n: 1, unit: Some(TimeUnit::Minute) }), - ("星期前", TaToken { n: 1, unit: Some(TimeUnit::Week) }), - ("秒前", TaToken { n: 1, unit: Some(TimeUnit::Second) }), - ("小時前", TaToken { n: 1, unit: Some(TimeUnit::Hour) }), + ("秒", TaToken { n: 1, unit: Some(TimeUnit::Second) }), + ("年", TaToken { n: 1, unit: Some(TimeUnit::Year) }), + ("小", TaToken { n: 1, unit: Some(TimeUnit::Hour) }), + ("分", TaToken { n: 1, unit: Some(TimeUnit::Minute) }), + ("月", TaToken { n: 1, unit: Some(TimeUnit::Month) }), + ("日", TaToken { n: 1, unit: Some(TimeUnit::Day) }), + ("期", TaToken { n: 1, unit: Some(TimeUnit::Week) }), ], }, Language::ZhTw => ::phf::Map { - key: 15467950696543387533, + key: 10121458955350035957, disps: &[ - (2, 1), - (0, 0), + (5, 0), + (6, 5), ], entries: &[ - ("個月前", TaToken { n: 1, unit: Some(TimeUnit::Month) }), - ("天前", TaToken { n: 1, unit: Some(TimeUnit::Day) }), - ("小時前", TaToken { n: 1, unit: Some(TimeUnit::Hour) }), - ("年前", TaToken { n: 1, unit: Some(TimeUnit::Year) }), - ("秒前", TaToken { n: 1, unit: Some(TimeUnit::Second) }), - ("分鐘前", TaToken { n: 1, unit: Some(TimeUnit::Minute) }), - ("週前", TaToken { n: 1, unit: Some(TimeUnit::Week) }), + ("天", TaToken { n: 1, unit: Some(TimeUnit::Day) }), + ("年", TaToken { n: 1, unit: Some(TimeUnit::Year) }), + ("秒", TaToken { n: 1, unit: Some(TimeUnit::Second) }), + ("週", TaToken { n: 1, unit: Some(TimeUnit::Week) }), + ("小", TaToken { n: 1, unit: Some(TimeUnit::Hour) }), + ("月", TaToken { n: 1, unit: Some(TimeUnit::Month) }), + ("分", TaToken { n: 1, unit: Some(TimeUnit::Minute) }), ], }, Language::Zu => ::phf::Map { diff --git a/src/lib.rs b/src/lib.rs index 6ca8e05..0ba2a92 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,11 +3,14 @@ #[macro_use] mod macros; +#[cfg(test)] +mod codegen; + mod cache; mod deobfuscate; +mod dictionary; mod serializer; mod util; -mod dictionary; pub mod client; pub mod download; diff --git a/src/model/locale.rs b/src/model/locale.rs index e5679f8..4ec4adb 100644 --- a/src/model/locale.rs +++ b/src/model/locale.rs @@ -1,10 +1,10 @@ +// This file is automatically generated. DO NOT EDIT. use std::{fmt::Display, str::FromStr}; use serde::{Deserialize, Serialize}; -// GENERATED SECTION START // #[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)] -#[serde(rename_all = "kebab-case")] +#[serde(rename_all = "lowercase")] pub enum Language { /// Afrikaans Af, @@ -185,7 +185,7 @@ pub enum Language { } #[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)] -#[serde(rename_all = "SCREAMING_SNAKE_CASE")] +#[serde(rename_all = "UPPERCASE")] pub enum Country { /// United Arab Emirates Ae, @@ -406,7 +406,410 @@ pub enum Country { /// Zimbabwe Zw, } -// GENERATED SECTION END // + +pub const LANGUAGES: [Language; 83] = [ + Language::Af, + Language::Am, + Language::Ar, + Language::As, + Language::Az, + Language::Be, + Language::Bg, + Language::Bn, + Language::Bs, + Language::Ca, + Language::Cs, + Language::Da, + Language::De, + Language::El, + Language::En, + Language::EnGb, + Language::EnIn, + Language::Es, + Language::Es419, + Language::EsUs, + Language::Et, + Language::Eu, + Language::Fa, + Language::Fi, + Language::Fil, + Language::Fr, + Language::FrCa, + Language::Gl, + Language::Gu, + Language::Hi, + Language::Hr, + Language::Hu, + Language::Hy, + Language::Id, + Language::Is, + Language::It, + Language::Iw, + Language::Ja, + Language::Ka, + Language::Kk, + Language::Km, + Language::Kn, + Language::Ko, + Language::Ky, + Language::Lo, + Language::Lt, + Language::Lv, + Language::Mk, + Language::Ml, + Language::Mn, + Language::Mr, + Language::Ms, + Language::My, + Language::Ne, + Language::Nl, + Language::No, + Language::Or, + Language::Pa, + Language::Pl, + Language::Pt, + Language::PtPt, + Language::Ro, + Language::Ru, + Language::Si, + Language::Sk, + Language::Sl, + Language::Sq, + Language::Sr, + Language::SrLatn, + Language::Sv, + Language::Sw, + Language::Ta, + Language::Te, + Language::Th, + Language::Tr, + Language::Uk, + Language::Ur, + Language::Uz, + Language::Vi, + Language::ZhCn, + Language::ZhHk, + Language::ZhTw, + Language::Zu, +]; + +pub const COUNTRIES: [Country; 109] = [ + Country::Ae, + Country::Ar, + Country::At, + Country::Au, + Country::Az, + Country::Ba, + Country::Bd, + Country::Be, + Country::Bg, + Country::Bh, + Country::Bo, + Country::Br, + Country::By, + Country::Ca, + Country::Ch, + Country::Cl, + Country::Co, + Country::Cr, + Country::Cy, + Country::Cz, + Country::De, + Country::Dk, + Country::Do, + Country::Dz, + Country::Ec, + Country::Ee, + Country::Eg, + Country::Es, + Country::Fi, + Country::Fr, + Country::Gb, + Country::Ge, + Country::Gh, + Country::Gr, + Country::Gt, + Country::Hk, + Country::Hn, + Country::Hr, + Country::Hu, + Country::Id, + Country::Ie, + Country::Il, + Country::In, + Country::Iq, + Country::Is, + Country::It, + Country::Jm, + Country::Jo, + Country::Jp, + Country::Ke, + Country::Kh, + Country::Kr, + Country::Kw, + Country::Kz, + Country::La, + Country::Lb, + Country::Li, + Country::Lk, + Country::Lt, + Country::Lu, + Country::Lv, + Country::Ly, + Country::Ma, + Country::Me, + Country::Mk, + Country::Mt, + Country::Mx, + Country::My, + Country::Ng, + Country::Ni, + Country::Nl, + Country::No, + Country::Np, + Country::Nz, + Country::Om, + Country::Pa, + Country::Pe, + Country::Pg, + Country::Ph, + Country::Pk, + Country::Pl, + Country::Pr, + Country::Pt, + Country::Py, + Country::Qa, + Country::Ro, + Country::Rs, + Country::Ru, + Country::Sa, + Country::Se, + Country::Sg, + Country::Si, + Country::Sk, + Country::Sn, + Country::Sv, + Country::Th, + Country::Tn, + Country::Tr, + Country::Tw, + Country::Tz, + Country::Ua, + Country::Ug, + Country::Us, + Country::Uy, + Country::Ve, + Country::Vn, + Country::Ye, + Country::Za, + Country::Zw, +]; + +impl Language { + pub fn name(&self) -> &str { + match self { + Language::Af => "Afrikaans", + Language::Am => "አማርኛ", + Language::Ar => "العربية", + Language::As => "অসমীয়া", + Language::Az => "Azərbaycan", + Language::Be => "Беларуская", + Language::Bg => "Български", + Language::Bn => "বাংলা", + Language::Bs => "Bosanski", + Language::Ca => "Català", + Language::Cs => "Čeština", + Language::Da => "Dansk", + Language::De => "Deutsch", + Language::El => "Ελληνικά", + Language::En => "English (US)", + Language::EnGb => "English (UK)", + Language::EnIn => "English (India)", + Language::Es => "Español (España)", + Language::Es419 => "Español (Latinoamérica)", + Language::EsUs => "Español (US)", + Language::Et => "Eesti", + Language::Eu => "Euskara", + Language::Fa => "فارسی", + Language::Fi => "Suomi", + Language::Fil => "Filipino", + Language::Fr => "Français", + Language::FrCa => "Français (Canada)", + Language::Gl => "Galego", + Language::Gu => "ગુજરાતી", + Language::Hi => "हिन्दी", + Language::Hr => "Hrvatski", + Language::Hu => "Magyar", + Language::Hy => "Հայերեն", + Language::Id => "Bahasa Indonesia", + Language::Is => "Íslenska", + Language::It => "Italiano", + Language::Iw => "עברית", + Language::Ja => "日本語", + Language::Ka => "ქართული", + Language::Kk => "Қазақ Тілі", + Language::Km => "ខ្មែរ", + Language::Kn => "ಕನ್ನಡ", + Language::Ko => "한국어", + Language::Ky => "Кыргызча", + Language::Lo => "ລາວ", + Language::Lt => "Lietuvių", + Language::Lv => "Latviešu valoda", + Language::Mk => "Македонски", + Language::Ml => "മലയാളം", + Language::Mn => "Монгол", + Language::Mr => "मराठी", + Language::Ms => "Bahasa Malaysia", + Language::My => "ဗမာ", + Language::Ne => "नेपाली", + Language::Nl => "Nederlands", + Language::No => "Norsk", + Language::Or => "ଓଡ଼ିଆ", + Language::Pa => "ਪੰਜਾਬੀ", + Language::Pl => "Polski", + Language::Pt => "Português (Brasil)", + Language::PtPt => "Português", + Language::Ro => "Română", + Language::Ru => "Русский", + Language::Si => "සිංහල", + Language::Sk => "Slovenčina", + Language::Sl => "Slovenščina", + Language::Sq => "Shqip", + Language::Sr => "Српски", + Language::SrLatn => "Srpski", + Language::Sv => "Svenska", + Language::Sw => "Kiswahili", + Language::Ta => "தமிழ்", + Language::Te => "తెలుగు", + Language::Th => "ภาษาไทย", + Language::Tr => "Türkçe", + Language::Uk => "Українська", + Language::Ur => "اردو", + Language::Uz => "O‘zbek", + Language::Vi => "Tiếng Việt", + Language::ZhCn => "中文 (简体)", + Language::ZhHk => "中文 (香港)", + Language::ZhTw => "中文 (繁體)", + Language::Zu => "IsiZulu", + } + } +} + +impl Country { + pub fn name(&self) -> &str { + match self { + Country::Ae => "United Arab Emirates", + Country::Ar => "Argentina", + Country::At => "Austria", + Country::Au => "Australia", + Country::Az => "Azerbaijan", + Country::Ba => "Bosnia and Herzegovina", + Country::Bd => "Bangladesh", + Country::Be => "Belgium", + Country::Bg => "Bulgaria", + Country::Bh => "Bahrain", + Country::Bo => "Bolivia", + Country::Br => "Brazil", + Country::By => "Belarus", + Country::Ca => "Canada", + Country::Ch => "Switzerland", + Country::Cl => "Chile", + Country::Co => "Colombia", + Country::Cr => "Costa Rica", + Country::Cy => "Cyprus", + Country::Cz => "Czechia", + Country::De => "Germany", + Country::Dk => "Denmark", + Country::Do => "Dominican Republic", + Country::Dz => "Algeria", + Country::Ec => "Ecuador", + Country::Ee => "Estonia", + Country::Eg => "Egypt", + Country::Es => "Spain", + Country::Fi => "Finland", + Country::Fr => "France", + Country::Gb => "United Kingdom", + Country::Ge => "Georgia", + Country::Gh => "Ghana", + Country::Gr => "Greece", + Country::Gt => "Guatemala", + Country::Hk => "Hong Kong", + Country::Hn => "Honduras", + Country::Hr => "Croatia", + Country::Hu => "Hungary", + Country::Id => "Indonesia", + Country::Ie => "Ireland", + Country::Il => "Israel", + Country::In => "India", + Country::Iq => "Iraq", + Country::Is => "Iceland", + Country::It => "Italy", + Country::Jm => "Jamaica", + Country::Jo => "Jordan", + Country::Jp => "Japan", + Country::Ke => "Kenya", + Country::Kh => "Cambodia", + Country::Kr => "South Korea", + Country::Kw => "Kuwait", + Country::Kz => "Kazakhstan", + Country::La => "Laos", + Country::Lb => "Lebanon", + Country::Li => "Liechtenstein", + Country::Lk => "Sri Lanka", + Country::Lt => "Lithuania", + Country::Lu => "Luxembourg", + Country::Lv => "Latvia", + Country::Ly => "Libya", + Country::Ma => "Morocco", + Country::Me => "Montenegro", + Country::Mk => "North Macedonia", + Country::Mt => "Malta", + Country::Mx => "Mexico", + Country::My => "Malaysia", + Country::Ng => "Nigeria", + Country::Ni => "Nicaragua", + Country::Nl => "Netherlands", + Country::No => "Norway", + Country::Np => "Nepal", + Country::Nz => "New Zealand", + Country::Om => "Oman", + Country::Pa => "Panama", + Country::Pe => "Peru", + Country::Pg => "Papua New Guinea", + Country::Ph => "Philippines", + Country::Pk => "Pakistan", + Country::Pl => "Poland", + Country::Pr => "Puerto Rico", + Country::Pt => "Portugal", + Country::Py => "Paraguay", + Country::Qa => "Qatar", + Country::Ro => "Romania", + Country::Rs => "Serbia", + Country::Ru => "Russia", + Country::Sa => "Saudi Arabia", + Country::Se => "Sweden", + Country::Sg => "Singapore", + Country::Si => "Slovenia", + Country::Sk => "Slovakia", + Country::Sn => "Senegal", + Country::Sv => "El Salvador", + Country::Th => "Thailand", + Country::Tn => "Tunisia", + Country::Tr => "Turkey", + Country::Tw => "Taiwan", + Country::Tz => "Tanzania", + Country::Ua => "Ukraine", + Country::Ug => "Uganda", + Country::Us => "United States", + Country::Uy => "Uruguay", + Country::Ve => "Venezuela", + Country::Vn => "Vietnam", + Country::Ye => "Yemen", + Country::Za => "South Africa", + Country::Zw => "Zimbabwe", + } + } +} impl Display for Language { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -426,7 +829,6 @@ impl Display for Country { impl FromStr for Language { type Err = serde_json::Error; - fn from_str(s: &str) -> Result { serde_json::from_str(&format!("\"{}\"", s)) } @@ -434,7 +836,6 @@ impl FromStr for Language { impl FromStr for Country { type Err = serde_json::Error; - fn from_str(s: &str) -> Result { serde_json::from_str(&format!("\"{}\"", s)) } diff --git a/src/timeago.rs b/src/timeago.rs index ea8999f..bf7edc6 100644 --- a/src/timeago.rs +++ b/src/timeago.rs @@ -4,92 +4,6 @@ use serde::{Deserialize, Serialize}; use crate::{dictionary, model::Language, util}; -pub const LANGUAGES: [Language; 83] = [ - Language::Af, - Language::Am, - Language::Ar, - Language::As, - Language::Az, - Language::Be, - Language::Bg, - Language::Bn, - Language::Bs, - Language::Ca, - Language::Cs, - Language::Da, - Language::De, - Language::El, - Language::En, - Language::EnGb, - Language::EnIn, - Language::Es, - Language::Es419, - Language::EsUs, - Language::Et, - Language::Eu, - Language::Fa, - Language::Fi, - Language::Fil, - Language::Fr, - Language::FrCa, - Language::Gl, - Language::Gu, - Language::Hi, - Language::Hr, - Language::Hu, - Language::Hy, - Language::Id, - Language::Is, - Language::It, - Language::Iw, - Language::Ja, - Language::Ka, - Language::Kk, - Language::Km, - Language::Kn, - Language::Ko, - Language::Ky, - Language::Lo, - Language::Lt, - Language::Lv, - Language::Mk, - Language::Ml, - Language::Mn, - Language::Mr, - Language::Ms, - Language::My, - Language::Ne, - Language::Nl, - Language::No, - Language::Or, - Language::Pa, - Language::Pl, - Language::Pt, - Language::PtPt, - Language::Ro, - Language::Ru, - Language::Si, - Language::Sk, - Language::Sl, - Language::Sq, - Language::Sr, - Language::SrLatn, - Language::Sv, - Language::Sw, - Language::Ta, - Language::Te, - Language::Th, - Language::Tr, - Language::Uk, - Language::Ur, - Language::Uz, - Language::Vi, - Language::ZhCn, - Language::ZhHk, - Language::ZhTw, - Language::Zu, -]; - #[derive(Debug, Copy, Clone, Serialize, Deserialize, Eq)] pub struct TimeAgo { pub n: u8, @@ -162,18 +76,35 @@ pub fn parse(lang: Language, textual_date: &str) -> Option { .collect::(); let mut qu: u8 = util::parse_numeric(&textual_date).unwrap_or(1); - filtered_str.split(' ').find_map(|word| { - mappings - .get(word) - .map(|t| match t.unit { - Some(unit) => Some(TimeAgo { n: t.n * qu, unit }), - None => { - qu = t.n; - None - } + + match lang { + Language::Ja | Language::ZhCn | Language::ZhHk | Language::ZhTw => { + filtered_str.chars().find_map(|word| { + mappings + .get(&word.to_string()) + .map(|t| match t.unit { + Some(unit) => Some(TimeAgo { n: t.n * qu, unit }), + None => { + qu = t.n; + None + } + }) + .flatten() }) - .flatten() - }) + } + _ => filtered_str.split(' ').find_map(|word| { + mappings + .get(word) + .map(|t| match t.unit { + Some(unit) => Some(TimeAgo { n: t.n * qu, unit }), + None => { + qu = t.n; + None + } + }) + .flatten() + }), + } } #[cfg(test)] diff --git a/testfiles/date/dictionary.json b/testfiles/date/dictionary.json index 7fc506e..cfbcfc0 100644 --- a/testfiles/date/dictionary.json +++ b/testfiles/date/dictionary.json @@ -548,14 +548,15 @@ } }, "ja": { + "by_char": true, "timeago_tokens": { - "か月前": "M", - "分前": "m", - "年前": "Y", - "日前": "D", - "時間前": "h", - "秒前": "s", - "週間前": "W" + "月": "M", + "分": "m", + "年": "Y", + "日": "D", + "時": "h", + "秒": "s", + "週": "W" } }, "ka": { @@ -1180,36 +1181,39 @@ } }, "zh-CN": { + "by_char": true, "timeago_tokens": { - "个月前": "M", - "分钟前": "m", - "周前": "W", - "天前": "D", - "小时前": "h", - "年前": "Y", - "秒钟前": "s" + "月": "M", + "分": "m", + "周": "W", + "天": "D", + "小": "h", + "年": "Y", + "秒": "s" } }, "zh-HK": { + "by_char": true, "timeago_tokens": { - "個月前": "M", - "分鐘前": "m", - "小時前": "h", - "年前": "Y", - "日前": "D", - "星期前": "W", - "秒前": "s" + "月": "M", + "分": "m", + "小": "h", + "年": "Y", + "日": "D", + "期": "W", + "秒": "s" } }, "zh-TW": { + "by_char": true, "timeago_tokens": { - "個月前": "M", - "分鐘前": "m", - "天前": "D", - "小時前": "h", - "年前": "Y", - "秒前": "s", - "週前": "W" + "月": "M", + "分": "m", + "天": "D", + "小": "h", + "年": "Y", + "秒": "s", + "週": "W" } }, "zu": {