fix: improve language docs + string parsing
This commit is contained in:
parent
86775ea95b
commit
e184341625
6 changed files with 255 additions and 92 deletions
|
|
@ -1,5 +1,7 @@
|
|||
use std::collections::BTreeMap;
|
||||
use std::fmt::Write;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
|
||||
use path_macro::path;
|
||||
use reqwest::header;
|
||||
|
|
@ -9,6 +11,7 @@ use serde_with::serde_as;
|
|||
use serde_with::VecSkipError;
|
||||
|
||||
use crate::model::Text;
|
||||
use crate::util::DICT_DIR;
|
||||
use crate::util::SRC_DIR;
|
||||
|
||||
#[serde_as]
|
||||
|
|
@ -141,16 +144,41 @@ struct LanguageCountryCommand {
|
|||
pub async fn generate_locales() {
|
||||
let (languages, countries) = get_locales().await;
|
||||
|
||||
let json_path = path!(*DICT_DIR / "lang_names.json");
|
||||
let json_file = File::open(json_path).unwrap();
|
||||
let lang_names: BTreeMap<String, String> =
|
||||
serde_json::from_reader(BufReader::new(json_file)).unwrap();
|
||||
|
||||
let code_head = r#"// This file is automatically generated. DO NOT EDIT.
|
||||
|
||||
//! Languages and countries
|
||||
|
||||
use std::str::FromStr;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::Error;
|
||||
"#;
|
||||
|
||||
let code_foot = r#"serde_plain::derive_fromstr_from_deserialize!(Language, Error);
|
||||
let code_foot = r#"impl FromStr for Language {
|
||||
type Err = Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
let mut sub = s;
|
||||
loop {
|
||||
if let Ok(v) = serde_plain::from_str(sub) {
|
||||
return Ok(v);
|
||||
}
|
||||
match sub.rfind('-') {
|
||||
Some(pos) => {
|
||||
sub = &sub[..pos];
|
||||
}
|
||||
None => return Err(Error::Other("could not parse language `{s}`".into())),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
serde_plain::derive_display_from_serialize!(Language);
|
||||
|
||||
serde_plain::derive_fromstr_from_deserialize!(Country, Error);
|
||||
|
|
@ -199,8 +227,8 @@ pub enum Country {
|
|||
"#
|
||||
.to_owned();
|
||||
|
||||
languages.iter().for_each(|(c, n)| {
|
||||
let enum_name = c
|
||||
languages.iter().for_each(|(code, native_name)| {
|
||||
let enum_name = code
|
||||
.split('-')
|
||||
.map(|c| {
|
||||
format!(
|
||||
|
|
@ -211,10 +239,16 @@ pub enum Country {
|
|||
})
|
||||
.collect::<String>();
|
||||
|
||||
let en_name = lang_names.get(code).expect(code);
|
||||
|
||||
// Language enum
|
||||
write!(code_langs, " /// {n}\n ").unwrap();
|
||||
if c.contains('-') {
|
||||
write!(code_langs, "#[serde(rename = \"{c}\")]\n ").unwrap();
|
||||
if en_name == native_name || code.starts_with("en") {
|
||||
write!(code_langs, " /// {native_name}\n ").unwrap();
|
||||
} else {
|
||||
write!(code_langs, " /// {en_name} / {native_name}\n ").unwrap();
|
||||
}
|
||||
if code.contains('-') {
|
||||
write!(code_langs, "#[serde(rename = \"{code}\")]\n ").unwrap();
|
||||
}
|
||||
code_langs += &enum_name;
|
||||
code_langs += ",\n";
|
||||
|
|
@ -225,7 +259,7 @@ pub enum Country {
|
|||
// Language names
|
||||
writeln!(
|
||||
code_lang_names,
|
||||
" Language::{enum_name} => \"{n}\","
|
||||
" Language::{enum_name} => \"{native_name}\","
|
||||
)
|
||||
.unwrap();
|
||||
});
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
//! Languages and countries
|
||||
|
||||
use std::str::FromStr;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::Error;
|
||||
|
|
@ -13,31 +15,31 @@ use crate::error::Error;
|
|||
pub enum Language {
|
||||
/// Afrikaans
|
||||
Af,
|
||||
/// አማርኛ
|
||||
/// Amharic / አማርኛ
|
||||
Am,
|
||||
/// العربية
|
||||
/// Arabic / العربية
|
||||
Ar,
|
||||
/// অসমীয়া
|
||||
/// Assamese / অসমীয়া
|
||||
As,
|
||||
/// Azərbaycan
|
||||
/// Azerbaijani / Azərbaycan
|
||||
Az,
|
||||
/// Беларуская
|
||||
/// Belarusian / Беларуская
|
||||
Be,
|
||||
/// Български
|
||||
/// Bulgarian / Български
|
||||
Bg,
|
||||
/// বাংলা
|
||||
/// Bangla / বাংলা
|
||||
Bn,
|
||||
/// Bosanski
|
||||
/// Bosnian / Bosanski
|
||||
Bs,
|
||||
/// Català
|
||||
/// Catalan / Català
|
||||
Ca,
|
||||
/// Čeština
|
||||
/// Czech / Čeština
|
||||
Cs,
|
||||
/// Dansk
|
||||
/// Danish / Dansk
|
||||
Da,
|
||||
/// Deutsch
|
||||
/// German / Deutsch
|
||||
De,
|
||||
/// Ελληνικά
|
||||
/// Greek / Ελληνικά
|
||||
El,
|
||||
/// English (US)
|
||||
En,
|
||||
|
|
@ -47,145 +49,145 @@ pub enum Language {
|
|||
/// English (India)
|
||||
#[serde(rename = "en-IN")]
|
||||
EnIn,
|
||||
/// Español (España)
|
||||
/// Spanish / Español (España)
|
||||
Es,
|
||||
/// Español (Latinoamérica)
|
||||
/// Latin American Spanish / Español (Latinoamérica)
|
||||
#[serde(rename = "es-419")]
|
||||
Es419,
|
||||
/// Español (US)
|
||||
/// Spanish (United States) / Español (US)
|
||||
#[serde(rename = "es-US")]
|
||||
EsUs,
|
||||
/// Eesti
|
||||
/// Estonian / Eesti
|
||||
Et,
|
||||
/// Euskara
|
||||
/// Basque / Euskara
|
||||
Eu,
|
||||
/// فارسی
|
||||
/// Persian / فارسی
|
||||
Fa,
|
||||
/// Suomi
|
||||
/// Finnish / Suomi
|
||||
Fi,
|
||||
/// Filipino
|
||||
Fil,
|
||||
/// Français
|
||||
/// French / Français
|
||||
Fr,
|
||||
/// Français (Canada)
|
||||
/// Canadian French / Français (Canada)
|
||||
#[serde(rename = "fr-CA")]
|
||||
FrCa,
|
||||
/// Galego
|
||||
/// Galician / Galego
|
||||
Gl,
|
||||
/// ગુજરાતી
|
||||
/// Gujarati / ગુજરાતી
|
||||
Gu,
|
||||
/// हिन्दी
|
||||
/// Hindi / हिन्दी
|
||||
Hi,
|
||||
/// Hrvatski
|
||||
/// Croatian / Hrvatski
|
||||
Hr,
|
||||
/// Magyar
|
||||
/// Hungarian / Magyar
|
||||
Hu,
|
||||
/// Հայերեն
|
||||
/// Armenian / Հայերեն
|
||||
Hy,
|
||||
/// Bahasa Indonesia
|
||||
/// Indonesian / Bahasa Indonesia
|
||||
Id,
|
||||
/// Íslenska
|
||||
/// Icelandic / Íslenska
|
||||
Is,
|
||||
/// Italiano
|
||||
/// Italian / Italiano
|
||||
It,
|
||||
/// עברית
|
||||
/// Hebrew / עברית
|
||||
Iw,
|
||||
/// 日本語
|
||||
/// Japanese / 日本語
|
||||
Ja,
|
||||
/// ქართული
|
||||
/// Georgian / ქართული
|
||||
Ka,
|
||||
/// Қазақ Тілі
|
||||
/// Kazakh / Қазақ Тілі
|
||||
Kk,
|
||||
/// ខ្មែរ
|
||||
/// Khmer / ខ្មែរ
|
||||
Km,
|
||||
/// ಕನ್ನಡ
|
||||
/// Kannada / ಕನ್ನಡ
|
||||
Kn,
|
||||
/// 한국어
|
||||
/// Korean / 한국어
|
||||
Ko,
|
||||
/// Кыргызча
|
||||
/// Kyrgyz / Кыргызча
|
||||
Ky,
|
||||
/// ລາວ
|
||||
/// Lao / ລາວ
|
||||
Lo,
|
||||
/// Lietuvių
|
||||
/// Lithuanian / Lietuvių
|
||||
Lt,
|
||||
/// Latviešu valoda
|
||||
/// Latvian / Latviešu valoda
|
||||
Lv,
|
||||
/// Македонски
|
||||
/// Macedonian / Македонски
|
||||
Mk,
|
||||
/// മലയാളം
|
||||
/// Malayalam / മലയാളം
|
||||
Ml,
|
||||
/// Монгол
|
||||
/// Mongolian / Монгол
|
||||
Mn,
|
||||
/// मराठी
|
||||
/// Marathi / मराठी
|
||||
Mr,
|
||||
/// Bahasa Malaysia
|
||||
/// Malay / Bahasa Malaysia
|
||||
Ms,
|
||||
/// ဗမာ
|
||||
/// Burmese / ဗမာ
|
||||
My,
|
||||
/// नेपाली
|
||||
/// Nepali / नेपाली
|
||||
Ne,
|
||||
/// Nederlands
|
||||
/// Dutch / Nederlands
|
||||
Nl,
|
||||
/// Norsk
|
||||
/// Norwegian / Norsk
|
||||
No,
|
||||
/// ଓଡ଼ିଆ
|
||||
/// Odia / ଓଡ଼ିଆ
|
||||
Or,
|
||||
/// ਪੰਜਾਬੀ
|
||||
/// Punjabi / ਪੰਜਾਬੀ
|
||||
Pa,
|
||||
/// Polski
|
||||
/// Polish / Polski
|
||||
Pl,
|
||||
/// Português (Brasil)
|
||||
/// Portuguese / Português (Brasil)
|
||||
Pt,
|
||||
/// Português
|
||||
/// European Portuguese / Português
|
||||
#[serde(rename = "pt-PT")]
|
||||
PtPt,
|
||||
/// Română
|
||||
/// Romanian / Română
|
||||
Ro,
|
||||
/// Русский
|
||||
/// Russian / Русский
|
||||
Ru,
|
||||
/// සිංහල
|
||||
/// Sinhala / සිංහල
|
||||
Si,
|
||||
/// Slovenčina
|
||||
/// Slovak / Slovenčina
|
||||
Sk,
|
||||
/// Slovenščina
|
||||
/// Slovenian / Slovenščina
|
||||
Sl,
|
||||
/// Shqip
|
||||
/// Albanian / Shqip
|
||||
Sq,
|
||||
/// Српски
|
||||
/// Serbian / Српски
|
||||
Sr,
|
||||
/// Srpski
|
||||
/// Serbian (Latin) / Srpski
|
||||
#[serde(rename = "sr-Latn")]
|
||||
SrLatn,
|
||||
/// Svenska
|
||||
/// Swedish / Svenska
|
||||
Sv,
|
||||
/// Kiswahili
|
||||
/// Swahili / Kiswahili
|
||||
Sw,
|
||||
/// தமிழ்
|
||||
/// Tamil / தமிழ்
|
||||
Ta,
|
||||
/// తెలుగు
|
||||
/// Telugu / తెలుగు
|
||||
Te,
|
||||
/// ภาษาไทย
|
||||
/// Thai / ภาษาไทย
|
||||
Th,
|
||||
/// Türkçe
|
||||
/// Turkish / Türkçe
|
||||
Tr,
|
||||
/// Українська
|
||||
/// Ukrainian / Українська
|
||||
Uk,
|
||||
/// اردو
|
||||
/// Urdu / اردو
|
||||
Ur,
|
||||
/// O‘zbek
|
||||
/// Uzbek / O‘zbek
|
||||
Uz,
|
||||
/// Tiếng Việt
|
||||
/// Vietnamese / Tiếng Việt
|
||||
Vi,
|
||||
/// 中文 (简体)
|
||||
/// Chinese (China) / 中文 (简体)
|
||||
#[serde(rename = "zh-CN")]
|
||||
ZhCn,
|
||||
/// 中文 (香港)
|
||||
/// Chinese (Hong Kong) / 中文 (香港)
|
||||
#[serde(rename = "zh-HK")]
|
||||
ZhHk,
|
||||
/// 中文 (繁體)
|
||||
/// Chinese (Taiwan) / 中文 (繁體)
|
||||
#[serde(rename = "zh-TW")]
|
||||
ZhTw,
|
||||
/// IsiZulu
|
||||
/// Zulu / IsiZulu
|
||||
Zu,
|
||||
}
|
||||
|
||||
|
|
@ -829,7 +831,25 @@ impl Country {
|
|||
}
|
||||
}
|
||||
|
||||
serde_plain::derive_fromstr_from_deserialize!(Language, Error);
|
||||
impl FromStr for Language {
|
||||
type Err = Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
let mut sub = s;
|
||||
loop {
|
||||
if let Ok(v) = serde_plain::from_str(sub) {
|
||||
return Ok(v);
|
||||
}
|
||||
match sub.rfind('-') {
|
||||
Some(pos) => {
|
||||
sub = &sub[..pos];
|
||||
}
|
||||
None => return Err(Error::Other("could not parse language `{s}`".into())),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
serde_plain::derive_display_from_serialize!(Language);
|
||||
|
||||
serde_plain::derive_fromstr_from_deserialize!(Country, Error);
|
||||
|
|
|
|||
|
|
@ -618,4 +618,16 @@ pub(crate) mod tests {
|
|||
let res_str = res.join(" ");
|
||||
assert_eq!(res_str, teststr)
|
||||
}
|
||||
|
||||
#[rstest]
|
||||
#[case("en", Some(Language::En))]
|
||||
#[case("en-GB", Some(Language::EnGb))]
|
||||
#[case("en-US", Some(Language::En))]
|
||||
#[case("en-ZZ", Some(Language::En))]
|
||||
#[case("xy", None)]
|
||||
#[case("xy-ZZ", None)]
|
||||
fn parse_language(#[case] s: &str, #[case] expect: Option<Language>) {
|
||||
let res = Language::from_str(s).ok();
|
||||
assert_eq!(res, expect);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
16
testfiles/dict/gen_lang_names.js
Normal file
16
testfiles/dict/gen_lang_names.js
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
const fs = require("fs");
|
||||
|
||||
const dict = JSON.parse(fs.readFileSync("dictionary.json"));
|
||||
|
||||
const intl = new Intl.DisplayNames(["en"], { type: "language" });
|
||||
|
||||
let langs = Object.keys(dict);
|
||||
Object.values(dict).forEach(entry => {
|
||||
if (entry.equivalent) {
|
||||
langs.push(...entry.equivalent);
|
||||
}
|
||||
});
|
||||
langs.sort();
|
||||
|
||||
const res = Object.fromEntries(langs.map((l) => [l, intl.of(l)]));
|
||||
fs.writeFileSync("lang_names.json", JSON.stringify(res, null, 2));
|
||||
85
testfiles/dict/lang_names.json
Normal file
85
testfiles/dict/lang_names.json
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
{
|
||||
"af": "Afrikaans",
|
||||
"am": "Amharic",
|
||||
"ar": "Arabic",
|
||||
"as": "Assamese",
|
||||
"az": "Azerbaijani",
|
||||
"be": "Belarusian",
|
||||
"bg": "Bulgarian",
|
||||
"bn": "Bangla",
|
||||
"bs": "Bosnian",
|
||||
"ca": "Catalan",
|
||||
"cs": "Czech",
|
||||
"da": "Danish",
|
||||
"de": "German",
|
||||
"el": "Greek",
|
||||
"en": "English",
|
||||
"en-GB": "British English",
|
||||
"en-IN": "English (India)",
|
||||
"es": "Spanish",
|
||||
"es-419": "Latin American Spanish",
|
||||
"es-US": "Spanish (United States)",
|
||||
"et": "Estonian",
|
||||
"eu": "Basque",
|
||||
"fa": "Persian",
|
||||
"fi": "Finnish",
|
||||
"fil": "Filipino",
|
||||
"fr": "French",
|
||||
"fr-CA": "Canadian French",
|
||||
"gl": "Galician",
|
||||
"gu": "Gujarati",
|
||||
"hi": "Hindi",
|
||||
"hr": "Croatian",
|
||||
"hu": "Hungarian",
|
||||
"hy": "Armenian",
|
||||
"id": "Indonesian",
|
||||
"is": "Icelandic",
|
||||
"it": "Italian",
|
||||
"iw": "Hebrew",
|
||||
"ja": "Japanese",
|
||||
"ka": "Georgian",
|
||||
"kk": "Kazakh",
|
||||
"km": "Khmer",
|
||||
"kn": "Kannada",
|
||||
"ko": "Korean",
|
||||
"ky": "Kyrgyz",
|
||||
"lo": "Lao",
|
||||
"lt": "Lithuanian",
|
||||
"lv": "Latvian",
|
||||
"mk": "Macedonian",
|
||||
"ml": "Malayalam",
|
||||
"mn": "Mongolian",
|
||||
"mr": "Marathi",
|
||||
"ms": "Malay",
|
||||
"my": "Burmese",
|
||||
"ne": "Nepali",
|
||||
"nl": "Dutch",
|
||||
"no": "Norwegian",
|
||||
"or": "Odia",
|
||||
"pa": "Punjabi",
|
||||
"pl": "Polish",
|
||||
"pt": "Portuguese",
|
||||
"pt-PT": "European Portuguese",
|
||||
"ro": "Romanian",
|
||||
"ru": "Russian",
|
||||
"si": "Sinhala",
|
||||
"sk": "Slovak",
|
||||
"sl": "Slovenian",
|
||||
"sq": "Albanian",
|
||||
"sr": "Serbian",
|
||||
"sr-Latn": "Serbian (Latin)",
|
||||
"sv": "Swedish",
|
||||
"sw": "Swahili",
|
||||
"ta": "Tamil",
|
||||
"te": "Telugu",
|
||||
"th": "Thai",
|
||||
"tr": "Turkish",
|
||||
"uk": "Ukrainian",
|
||||
"ur": "Urdu",
|
||||
"uz": "Uzbek",
|
||||
"vi": "Vietnamese",
|
||||
"zh-CN": "Chinese (China)",
|
||||
"zh-HK": "Chinese (Hong Kong)",
|
||||
"zh-TW": "Chinese (Taiwan)",
|
||||
"zu": "Zulu"
|
||||
}
|
||||
|
|
@ -1172,12 +1172,8 @@ fn search_suggestion(rp: RustyPipe) {
|
|||
|
||||
#[rstest]
|
||||
fn search_suggestion_empty(rp: RustyPipe) {
|
||||
let result = tokio_test::block_on(
|
||||
rp.query()
|
||||
.lang(Language::Th)
|
||||
.search_suggestion("fjew327p4ifjelwfvnewg49"),
|
||||
)
|
||||
.unwrap();
|
||||
let result =
|
||||
tokio_test::block_on(rp.query().search_suggestion("fjew327p4ifjelwfvnewg49")).unwrap();
|
||||
|
||||
assert!(result.is_empty());
|
||||
}
|
||||
|
|
|
|||
Reference in a new issue