fix: improve language docs + string parsing

This commit is contained in:
ThetaDev 2023-05-11 17:00:01 +02:00
parent 86775ea95b
commit e184341625
6 changed files with 255 additions and 92 deletions

View file

@ -1,5 +1,7 @@
use std::collections::BTreeMap;
use std::fmt::Write;
use std::fs::File;
use std::io::BufReader;
use path_macro::path;
use reqwest::header;
@ -9,6 +11,7 @@ use serde_with::serde_as;
use serde_with::VecSkipError;
use crate::model::Text;
use crate::util::DICT_DIR;
use crate::util::SRC_DIR;
#[serde_as]
@ -141,16 +144,41 @@ struct LanguageCountryCommand {
pub async fn generate_locales() {
let (languages, countries) = get_locales().await;
let json_path = path!(*DICT_DIR / "lang_names.json");
let json_file = File::open(json_path).unwrap();
let lang_names: BTreeMap<String, String> =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let code_head = r#"// This file is automatically generated. DO NOT EDIT.
//! Languages and countries
use std::str::FromStr;
use serde::{Deserialize, Serialize};
use crate::error::Error;
"#;
let code_foot = r#"serde_plain::derive_fromstr_from_deserialize!(Language, Error);
let code_foot = r#"impl FromStr for Language {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut sub = s;
loop {
if let Ok(v) = serde_plain::from_str(sub) {
return Ok(v);
}
match sub.rfind('-') {
Some(pos) => {
sub = &sub[..pos];
}
None => return Err(Error::Other("could not parse language `{s}`".into())),
}
}
}
}
serde_plain::derive_display_from_serialize!(Language);
serde_plain::derive_fromstr_from_deserialize!(Country, Error);
@ -199,8 +227,8 @@ pub enum Country {
"#
.to_owned();
languages.iter().for_each(|(c, n)| {
let enum_name = c
languages.iter().for_each(|(code, native_name)| {
let enum_name = code
.split('-')
.map(|c| {
format!(
@ -211,10 +239,16 @@ pub enum Country {
})
.collect::<String>();
let en_name = lang_names.get(code).expect(code);
// Language enum
write!(code_langs, " /// {n}\n ").unwrap();
if c.contains('-') {
write!(code_langs, "#[serde(rename = \"{c}\")]\n ").unwrap();
if en_name == native_name || code.starts_with("en") {
write!(code_langs, " /// {native_name}\n ").unwrap();
} else {
write!(code_langs, " /// {en_name} / {native_name}\n ").unwrap();
}
if code.contains('-') {
write!(code_langs, "#[serde(rename = \"{code}\")]\n ").unwrap();
}
code_langs += &enum_name;
code_langs += ",\n";
@ -225,7 +259,7 @@ pub enum Country {
// Language names
writeln!(
code_lang_names,
" Language::{enum_name} => \"{n}\","
" Language::{enum_name} => \"{native_name}\","
)
.unwrap();
});

View file

@ -2,6 +2,8 @@
//! Languages and countries
use std::str::FromStr;
use serde::{Deserialize, Serialize};
use crate::error::Error;
@ -13,31 +15,31 @@ use crate::error::Error;
pub enum Language {
/// Afrikaans
Af,
/// አማርኛ
/// Amharic / አማርኛ
Am,
/// العربية
/// Arabic / العربية
Ar,
/// অসমীয়া
/// Assamese / অসমীয়া
As,
/// Azərbaycan
/// Azerbaijani / Azərbaycan
Az,
/// Беларуская
/// Belarusian / Беларуская
Be,
/// Български
/// Bulgarian / Български
Bg,
/// বাংলা
/// Bangla / বাংলা
Bn,
/// Bosanski
/// Bosnian / Bosanski
Bs,
/// Català
/// Catalan / Català
Ca,
/// Čeština
/// Czech / Čeština
Cs,
/// Dansk
/// Danish / Dansk
Da,
/// Deutsch
/// German / Deutsch
De,
/// Ελληνικά
/// Greek / Ελληνικά
El,
/// English (US)
En,
@ -47,145 +49,145 @@ pub enum Language {
/// English (India)
#[serde(rename = "en-IN")]
EnIn,
/// Español (España)
/// Spanish / Español (España)
Es,
/// Español (Latinoamérica)
/// Latin American Spanish / Español (Latinoamérica)
#[serde(rename = "es-419")]
Es419,
/// Español (US)
/// Spanish (United States) / Español (US)
#[serde(rename = "es-US")]
EsUs,
/// Eesti
/// Estonian / Eesti
Et,
/// Euskara
/// Basque / Euskara
Eu,
/// فارسی
/// Persian / فارسی
Fa,
/// Suomi
/// Finnish / Suomi
Fi,
/// Filipino
Fil,
/// Français
/// French / Français
Fr,
/// Français (Canada)
/// Canadian French / Français (Canada)
#[serde(rename = "fr-CA")]
FrCa,
/// Galego
/// Galician / Galego
Gl,
/// ગુજરાતી
/// Gujarati / ગુજરાતી
Gu,
/// हिन्दी
/// Hindi / हिन्दी
Hi,
/// Hrvatski
/// Croatian / Hrvatski
Hr,
/// Magyar
/// Hungarian / Magyar
Hu,
/// Հայերեն
/// Armenian / Հայերեն
Hy,
/// Bahasa Indonesia
/// Indonesian / Bahasa Indonesia
Id,
/// Íslenska
/// Icelandic / Íslenska
Is,
/// Italiano
/// Italian / Italiano
It,
/// עברית
/// Hebrew / עברית
Iw,
/// 日本語
/// Japanese / 日本語
Ja,
/// ქართული
/// Georgian / ქართული
Ka,
/// Қазақ Тілі
/// Kazakh / Қазақ Тілі
Kk,
/// ខ្មែរ
/// Khmer / ខ្មែរ
Km,
/// ಕನ್ನಡ
/// Kannada / ಕನ್ನಡ
Kn,
/// 한국어
/// Korean / 한국어
Ko,
/// Кыргызча
/// Kyrgyz / Кыргызча
Ky,
/// ລາວ
/// Lao / ລາວ
Lo,
/// Lietuvių
/// Lithuanian / Lietuvių
Lt,
/// Latviešu valoda
/// Latvian / Latviešu valoda
Lv,
/// Македонски
/// Macedonian / Македонски
Mk,
/// മലയാളം
/// Malayalam / മലയാളം
Ml,
/// Монгол
/// Mongolian / Монгол
Mn,
/// मराठी
/// Marathi / मराठी
Mr,
/// Bahasa Malaysia
/// Malay / Bahasa Malaysia
Ms,
/// ဗမာ
/// Burmese / ဗမာ
My,
/// नेपाली
/// Nepali / नेपाली
Ne,
/// Nederlands
/// Dutch / Nederlands
Nl,
/// Norsk
/// Norwegian / Norsk
No,
/// ଓଡ଼ିଆ
/// Odia / ଓଡ଼ିଆ
Or,
/// ਪੰਜਾਬੀ
/// Punjabi / ਪੰਜਾਬੀ
Pa,
/// Polski
/// Polish / Polski
Pl,
/// Português (Brasil)
/// Portuguese / Português (Brasil)
Pt,
/// Português
/// European Portuguese / Português
#[serde(rename = "pt-PT")]
PtPt,
/// Română
/// Romanian / Română
Ro,
/// Русский
/// Russian / Русский
Ru,
/// සිංහල
/// Sinhala / සිංහල
Si,
/// Slovenčina
/// Slovak / Slovenčina
Sk,
/// Slovenščina
/// Slovenian / Slovenščina
Sl,
/// Shqip
/// Albanian / Shqip
Sq,
/// Српски
/// Serbian / Српски
Sr,
/// Srpski
/// Serbian (Latin) / Srpski
#[serde(rename = "sr-Latn")]
SrLatn,
/// Svenska
/// Swedish / Svenska
Sv,
/// Kiswahili
/// Swahili / Kiswahili
Sw,
/// தமிழ்
/// Tamil / தமிழ்
Ta,
/// తెలుగు
/// Telugu / తెలుగు
Te,
/// ภาษาไทย
/// Thai / ภาษาไทย
Th,
/// Türkçe
/// Turkish / Türkçe
Tr,
/// Українська
/// Ukrainian / Українська
Uk,
/// اردو
/// Urdu / اردو
Ur,
/// Ozbek
/// Uzbek / Ozbek
Uz,
/// Tiếng Việt
/// Vietnamese / Tiếng Việt
Vi,
/// 中文 (简体)
/// Chinese (China) / 中文 (简体)
#[serde(rename = "zh-CN")]
ZhCn,
/// 中文 (香港)
/// Chinese (Hong Kong) / 中文 (香港)
#[serde(rename = "zh-HK")]
ZhHk,
/// 中文 (繁體)
/// Chinese (Taiwan) / 中文 (繁體)
#[serde(rename = "zh-TW")]
ZhTw,
/// IsiZulu
/// Zulu / IsiZulu
Zu,
}
@ -829,7 +831,25 @@ impl Country {
}
}
serde_plain::derive_fromstr_from_deserialize!(Language, Error);
impl FromStr for Language {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut sub = s;
loop {
if let Ok(v) = serde_plain::from_str(sub) {
return Ok(v);
}
match sub.rfind('-') {
Some(pos) => {
sub = &sub[..pos];
}
None => return Err(Error::Other("could not parse language `{s}`".into())),
}
}
}
}
serde_plain::derive_display_from_serialize!(Language);
serde_plain::derive_fromstr_from_deserialize!(Country, Error);

View file

@ -618,4 +618,16 @@ pub(crate) mod tests {
let res_str = res.join(" ");
assert_eq!(res_str, teststr)
}
#[rstest]
#[case("en", Some(Language::En))]
#[case("en-GB", Some(Language::EnGb))]
#[case("en-US", Some(Language::En))]
#[case("en-ZZ", Some(Language::En))]
#[case("xy", None)]
#[case("xy-ZZ", None)]
fn parse_language(#[case] s: &str, #[case] expect: Option<Language>) {
let res = Language::from_str(s).ok();
assert_eq!(res, expect);
}
}

View file

@ -0,0 +1,16 @@
const fs = require("fs");
const dict = JSON.parse(fs.readFileSync("dictionary.json"));
const intl = new Intl.DisplayNames(["en"], { type: "language" });
let langs = Object.keys(dict);
Object.values(dict).forEach(entry => {
if (entry.equivalent) {
langs.push(...entry.equivalent);
}
});
langs.sort();
const res = Object.fromEntries(langs.map((l) => [l, intl.of(l)]));
fs.writeFileSync("lang_names.json", JSON.stringify(res, null, 2));

View file

@ -0,0 +1,85 @@
{
"af": "Afrikaans",
"am": "Amharic",
"ar": "Arabic",
"as": "Assamese",
"az": "Azerbaijani",
"be": "Belarusian",
"bg": "Bulgarian",
"bn": "Bangla",
"bs": "Bosnian",
"ca": "Catalan",
"cs": "Czech",
"da": "Danish",
"de": "German",
"el": "Greek",
"en": "English",
"en-GB": "British English",
"en-IN": "English (India)",
"es": "Spanish",
"es-419": "Latin American Spanish",
"es-US": "Spanish (United States)",
"et": "Estonian",
"eu": "Basque",
"fa": "Persian",
"fi": "Finnish",
"fil": "Filipino",
"fr": "French",
"fr-CA": "Canadian French",
"gl": "Galician",
"gu": "Gujarati",
"hi": "Hindi",
"hr": "Croatian",
"hu": "Hungarian",
"hy": "Armenian",
"id": "Indonesian",
"is": "Icelandic",
"it": "Italian",
"iw": "Hebrew",
"ja": "Japanese",
"ka": "Georgian",
"kk": "Kazakh",
"km": "Khmer",
"kn": "Kannada",
"ko": "Korean",
"ky": "Kyrgyz",
"lo": "Lao",
"lt": "Lithuanian",
"lv": "Latvian",
"mk": "Macedonian",
"ml": "Malayalam",
"mn": "Mongolian",
"mr": "Marathi",
"ms": "Malay",
"my": "Burmese",
"ne": "Nepali",
"nl": "Dutch",
"no": "Norwegian",
"or": "Odia",
"pa": "Punjabi",
"pl": "Polish",
"pt": "Portuguese",
"pt-PT": "European Portuguese",
"ro": "Romanian",
"ru": "Russian",
"si": "Sinhala",
"sk": "Slovak",
"sl": "Slovenian",
"sq": "Albanian",
"sr": "Serbian",
"sr-Latn": "Serbian (Latin)",
"sv": "Swedish",
"sw": "Swahili",
"ta": "Tamil",
"te": "Telugu",
"th": "Thai",
"tr": "Turkish",
"uk": "Ukrainian",
"ur": "Urdu",
"uz": "Uzbek",
"vi": "Vietnamese",
"zh-CN": "Chinese (China)",
"zh-HK": "Chinese (Hong Kong)",
"zh-TW": "Chinese (Taiwan)",
"zu": "Zulu"
}

View file

@ -1172,12 +1172,8 @@ fn search_suggestion(rp: RustyPipe) {
#[rstest]
fn search_suggestion_empty(rp: RustyPipe) {
let result = tokio_test::block_on(
rp.query()
.lang(Language::Th)
.search_suggestion("fjew327p4ifjelwfvnewg49"),
)
.unwrap();
let result =
tokio_test::block_on(rp.query().search_suggestion("fjew327p4ifjelwfvnewg49")).unwrap();
assert!(result.is_empty());
}