fix: improve language docs + string parsing

This commit is contained in:
ThetaDev 2023-05-11 17:00:01 +02:00
parent 86775ea95b
commit e184341625
6 changed files with 255 additions and 92 deletions

View file

@ -1,5 +1,7 @@
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::fmt::Write; use std::fmt::Write;
use std::fs::File;
use std::io::BufReader;
use path_macro::path; use path_macro::path;
use reqwest::header; use reqwest::header;
@ -9,6 +11,7 @@ use serde_with::serde_as;
use serde_with::VecSkipError; use serde_with::VecSkipError;
use crate::model::Text; use crate::model::Text;
use crate::util::DICT_DIR;
use crate::util::SRC_DIR; use crate::util::SRC_DIR;
#[serde_as] #[serde_as]
@ -141,16 +144,41 @@ struct LanguageCountryCommand {
pub async fn generate_locales() { pub async fn generate_locales() {
let (languages, countries) = get_locales().await; let (languages, countries) = get_locales().await;
let json_path = path!(*DICT_DIR / "lang_names.json");
let json_file = File::open(json_path).unwrap();
let lang_names: BTreeMap<String, String> =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let code_head = r#"// This file is automatically generated. DO NOT EDIT. let code_head = r#"// This file is automatically generated. DO NOT EDIT.
//! Languages and countries //! Languages and countries
use std::str::FromStr;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::error::Error; use crate::error::Error;
"#; "#;
let code_foot = r#"serde_plain::derive_fromstr_from_deserialize!(Language, Error); let code_foot = r#"impl FromStr for Language {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut sub = s;
loop {
if let Ok(v) = serde_plain::from_str(sub) {
return Ok(v);
}
match sub.rfind('-') {
Some(pos) => {
sub = &sub[..pos];
}
None => return Err(Error::Other("could not parse language `{s}`".into())),
}
}
}
}
serde_plain::derive_display_from_serialize!(Language); serde_plain::derive_display_from_serialize!(Language);
serde_plain::derive_fromstr_from_deserialize!(Country, Error); serde_plain::derive_fromstr_from_deserialize!(Country, Error);
@ -199,8 +227,8 @@ pub enum Country {
"# "#
.to_owned(); .to_owned();
languages.iter().for_each(|(c, n)| { languages.iter().for_each(|(code, native_name)| {
let enum_name = c let enum_name = code
.split('-') .split('-')
.map(|c| { .map(|c| {
format!( format!(
@ -211,10 +239,16 @@ pub enum Country {
}) })
.collect::<String>(); .collect::<String>();
let en_name = lang_names.get(code).expect(code);
// Language enum // Language enum
write!(code_langs, " /// {n}\n ").unwrap(); if en_name == native_name || code.starts_with("en") {
if c.contains('-') { write!(code_langs, " /// {native_name}\n ").unwrap();
write!(code_langs, "#[serde(rename = \"{c}\")]\n ").unwrap(); } else {
write!(code_langs, " /// {en_name} / {native_name}\n ").unwrap();
}
if code.contains('-') {
write!(code_langs, "#[serde(rename = \"{code}\")]\n ").unwrap();
} }
code_langs += &enum_name; code_langs += &enum_name;
code_langs += ",\n"; code_langs += ",\n";
@ -225,7 +259,7 @@ pub enum Country {
// Language names // Language names
writeln!( writeln!(
code_lang_names, code_lang_names,
" Language::{enum_name} => \"{n}\"," " Language::{enum_name} => \"{native_name}\","
) )
.unwrap(); .unwrap();
}); });

View file

@ -2,6 +2,8 @@
//! Languages and countries //! Languages and countries
use std::str::FromStr;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::error::Error; use crate::error::Error;
@ -13,31 +15,31 @@ use crate::error::Error;
pub enum Language { pub enum Language {
/// Afrikaans /// Afrikaans
Af, Af,
/// አማርኛ /// Amharic / አማርኛ
Am, Am,
/// العربية /// Arabic / العربية
Ar, Ar,
/// অসমীয়া /// Assamese / অসমীয়া
As, As,
/// Azərbaycan /// Azerbaijani / Azərbaycan
Az, Az,
/// Беларуская /// Belarusian / Беларуская
Be, Be,
/// Български /// Bulgarian / Български
Bg, Bg,
/// বাংলা /// Bangla / বাংলা
Bn, Bn,
/// Bosanski /// Bosnian / Bosanski
Bs, Bs,
/// Català /// Catalan / Català
Ca, Ca,
/// Čeština /// Czech / Čeština
Cs, Cs,
/// Dansk /// Danish / Dansk
Da, Da,
/// Deutsch /// German / Deutsch
De, De,
/// Ελληνικά /// Greek / Ελληνικά
El, El,
/// English (US) /// English (US)
En, En,
@ -47,145 +49,145 @@ pub enum Language {
/// English (India) /// English (India)
#[serde(rename = "en-IN")] #[serde(rename = "en-IN")]
EnIn, EnIn,
/// Español (España) /// Spanish / Español (España)
Es, Es,
/// Español (Latinoamérica) /// Latin American Spanish / Español (Latinoamérica)
#[serde(rename = "es-419")] #[serde(rename = "es-419")]
Es419, Es419,
/// Español (US) /// Spanish (United States) / Español (US)
#[serde(rename = "es-US")] #[serde(rename = "es-US")]
EsUs, EsUs,
/// Eesti /// Estonian / Eesti
Et, Et,
/// Euskara /// Basque / Euskara
Eu, Eu,
/// فارسی /// Persian / فارسی
Fa, Fa,
/// Suomi /// Finnish / Suomi
Fi, Fi,
/// Filipino /// Filipino
Fil, Fil,
/// Français /// French / Français
Fr, Fr,
/// Français (Canada) /// Canadian French / Français (Canada)
#[serde(rename = "fr-CA")] #[serde(rename = "fr-CA")]
FrCa, FrCa,
/// Galego /// Galician / Galego
Gl, Gl,
/// ગુજરાતી /// Gujarati / ગુજરાતી
Gu, Gu,
/// हिन्दी /// Hindi / हिन्दी
Hi, Hi,
/// Hrvatski /// Croatian / Hrvatski
Hr, Hr,
/// Magyar /// Hungarian / Magyar
Hu, Hu,
/// Հայերեն /// Armenian / Հայերեն
Hy, Hy,
/// Bahasa Indonesia /// Indonesian / Bahasa Indonesia
Id, Id,
/// Íslenska /// Icelandic / Íslenska
Is, Is,
/// Italiano /// Italian / Italiano
It, It,
/// עברית /// Hebrew / עברית
Iw, Iw,
/// 日本語 /// Japanese / 日本語
Ja, Ja,
/// ქართული /// Georgian / ქართული
Ka, Ka,
/// Қазақ Тілі /// Kazakh / Қазақ Тілі
Kk, Kk,
/// ខ្មែរ /// Khmer / ខ្មែរ
Km, Km,
/// ಕನ್ನಡ /// Kannada / ಕನ್ನಡ
Kn, Kn,
/// 한국어 /// Korean / 한국어
Ko, Ko,
/// Кыргызча /// Kyrgyz / Кыргызча
Ky, Ky,
/// ລາວ /// Lao / ລາວ
Lo, Lo,
/// Lietuvių /// Lithuanian / Lietuvių
Lt, Lt,
/// Latviešu valoda /// Latvian / Latviešu valoda
Lv, Lv,
/// Македонски /// Macedonian / Македонски
Mk, Mk,
/// മലയാളം /// Malayalam / മലയാളം
Ml, Ml,
/// Монгол /// Mongolian / Монгол
Mn, Mn,
/// मराठी /// Marathi / मराठी
Mr, Mr,
/// Bahasa Malaysia /// Malay / Bahasa Malaysia
Ms, Ms,
/// ဗမာ /// Burmese / ဗမာ
My, My,
/// नेपाली /// Nepali / नेपाली
Ne, Ne,
/// Nederlands /// Dutch / Nederlands
Nl, Nl,
/// Norsk /// Norwegian / Norsk
No, No,
/// ଓଡ଼ିଆ /// Odia / ଓଡ଼ିଆ
Or, Or,
/// ਪੰਜਾਬੀ /// Punjabi / ਪੰਜਾਬੀ
Pa, Pa,
/// Polski /// Polish / Polski
Pl, Pl,
/// Português (Brasil) /// Portuguese / Português (Brasil)
Pt, Pt,
/// Português /// European Portuguese / Português
#[serde(rename = "pt-PT")] #[serde(rename = "pt-PT")]
PtPt, PtPt,
/// Română /// Romanian / Română
Ro, Ro,
/// Русский /// Russian / Русский
Ru, Ru,
/// සිංහල /// Sinhala / සිංහල
Si, Si,
/// Slovenčina /// Slovak / Slovenčina
Sk, Sk,
/// Slovenščina /// Slovenian / Slovenščina
Sl, Sl,
/// Shqip /// Albanian / Shqip
Sq, Sq,
/// Српски /// Serbian / Српски
Sr, Sr,
/// Srpski /// Serbian (Latin) / Srpski
#[serde(rename = "sr-Latn")] #[serde(rename = "sr-Latn")]
SrLatn, SrLatn,
/// Svenska /// Swedish / Svenska
Sv, Sv,
/// Kiswahili /// Swahili / Kiswahili
Sw, Sw,
/// தமிழ் /// Tamil / தமிழ்
Ta, Ta,
/// తెలుగు /// Telugu / తెలుగు
Te, Te,
/// ภาษาไทย /// Thai / ภาษาไทย
Th, Th,
/// Türkçe /// Turkish / Türkçe
Tr, Tr,
/// Українська /// Ukrainian / Українська
Uk, Uk,
/// اردو /// Urdu / اردو
Ur, Ur,
/// Ozbek /// Uzbek / Ozbek
Uz, Uz,
/// Tiếng Việt /// Vietnamese / Tiếng Việt
Vi, Vi,
/// 中文 (简体) /// Chinese (China) / 中文 (简体)
#[serde(rename = "zh-CN")] #[serde(rename = "zh-CN")]
ZhCn, ZhCn,
/// 中文 (香港) /// Chinese (Hong Kong) / 中文 (香港)
#[serde(rename = "zh-HK")] #[serde(rename = "zh-HK")]
ZhHk, ZhHk,
/// 中文 (繁體) /// Chinese (Taiwan) / 中文 (繁體)
#[serde(rename = "zh-TW")] #[serde(rename = "zh-TW")]
ZhTw, ZhTw,
/// IsiZulu /// Zulu / IsiZulu
Zu, Zu,
} }
@ -829,7 +831,25 @@ impl Country {
} }
} }
serde_plain::derive_fromstr_from_deserialize!(Language, Error); impl FromStr for Language {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut sub = s;
loop {
if let Ok(v) = serde_plain::from_str(sub) {
return Ok(v);
}
match sub.rfind('-') {
Some(pos) => {
sub = &sub[..pos];
}
None => return Err(Error::Other("could not parse language `{s}`".into())),
}
}
}
}
serde_plain::derive_display_from_serialize!(Language); serde_plain::derive_display_from_serialize!(Language);
serde_plain::derive_fromstr_from_deserialize!(Country, Error); serde_plain::derive_fromstr_from_deserialize!(Country, Error);

View file

@ -618,4 +618,16 @@ pub(crate) mod tests {
let res_str = res.join(" "); let res_str = res.join(" ");
assert_eq!(res_str, teststr) assert_eq!(res_str, teststr)
} }
#[rstest]
#[case("en", Some(Language::En))]
#[case("en-GB", Some(Language::EnGb))]
#[case("en-US", Some(Language::En))]
#[case("en-ZZ", Some(Language::En))]
#[case("xy", None)]
#[case("xy-ZZ", None)]
fn parse_language(#[case] s: &str, #[case] expect: Option<Language>) {
let res = Language::from_str(s).ok();
assert_eq!(res, expect);
}
} }

View file

@ -0,0 +1,16 @@
const fs = require("fs");
const dict = JSON.parse(fs.readFileSync("dictionary.json"));
const intl = new Intl.DisplayNames(["en"], { type: "language" });
let langs = Object.keys(dict);
Object.values(dict).forEach(entry => {
if (entry.equivalent) {
langs.push(...entry.equivalent);
}
});
langs.sort();
const res = Object.fromEntries(langs.map((l) => [l, intl.of(l)]));
fs.writeFileSync("lang_names.json", JSON.stringify(res, null, 2));

View file

@ -0,0 +1,85 @@
{
"af": "Afrikaans",
"am": "Amharic",
"ar": "Arabic",
"as": "Assamese",
"az": "Azerbaijani",
"be": "Belarusian",
"bg": "Bulgarian",
"bn": "Bangla",
"bs": "Bosnian",
"ca": "Catalan",
"cs": "Czech",
"da": "Danish",
"de": "German",
"el": "Greek",
"en": "English",
"en-GB": "British English",
"en-IN": "English (India)",
"es": "Spanish",
"es-419": "Latin American Spanish",
"es-US": "Spanish (United States)",
"et": "Estonian",
"eu": "Basque",
"fa": "Persian",
"fi": "Finnish",
"fil": "Filipino",
"fr": "French",
"fr-CA": "Canadian French",
"gl": "Galician",
"gu": "Gujarati",
"hi": "Hindi",
"hr": "Croatian",
"hu": "Hungarian",
"hy": "Armenian",
"id": "Indonesian",
"is": "Icelandic",
"it": "Italian",
"iw": "Hebrew",
"ja": "Japanese",
"ka": "Georgian",
"kk": "Kazakh",
"km": "Khmer",
"kn": "Kannada",
"ko": "Korean",
"ky": "Kyrgyz",
"lo": "Lao",
"lt": "Lithuanian",
"lv": "Latvian",
"mk": "Macedonian",
"ml": "Malayalam",
"mn": "Mongolian",
"mr": "Marathi",
"ms": "Malay",
"my": "Burmese",
"ne": "Nepali",
"nl": "Dutch",
"no": "Norwegian",
"or": "Odia",
"pa": "Punjabi",
"pl": "Polish",
"pt": "Portuguese",
"pt-PT": "European Portuguese",
"ro": "Romanian",
"ru": "Russian",
"si": "Sinhala",
"sk": "Slovak",
"sl": "Slovenian",
"sq": "Albanian",
"sr": "Serbian",
"sr-Latn": "Serbian (Latin)",
"sv": "Swedish",
"sw": "Swahili",
"ta": "Tamil",
"te": "Telugu",
"th": "Thai",
"tr": "Turkish",
"uk": "Ukrainian",
"ur": "Urdu",
"uz": "Uzbek",
"vi": "Vietnamese",
"zh-CN": "Chinese (China)",
"zh-HK": "Chinese (Hong Kong)",
"zh-TW": "Chinese (Taiwan)",
"zu": "Zulu"
}

View file

@ -1172,12 +1172,8 @@ fn search_suggestion(rp: RustyPipe) {
#[rstest] #[rstest]
fn search_suggestion_empty(rp: RustyPipe) { fn search_suggestion_empty(rp: RustyPipe) {
let result = tokio_test::block_on( let result =
rp.query() tokio_test::block_on(rp.query().search_suggestion("fjew327p4ifjelwfvnewg49")).unwrap();
.lang(Language::Th)
.search_suggestion("fjew327p4ifjelwfvnewg49"),
)
.unwrap();
assert!(result.is_empty()); assert!(result.is_empty());
} }