From e1843416255d26eb91f582f4be33adcc79d98c84 Mon Sep 17 00:00:00 2001 From: ThetaDev Date: Thu, 11 May 2023 17:00:01 +0200 Subject: [PATCH] fix: improve language docs + string parsing --- codegen/src/gen_locales.rs | 48 +++++++-- src/param/locale.rs | 178 +++++++++++++++++-------------- src/util/mod.rs | 12 +++ testfiles/dict/gen_lang_names.js | 16 +++ testfiles/dict/lang_names.json | 85 +++++++++++++++ tests/youtube.rs | 8 +- 6 files changed, 255 insertions(+), 92 deletions(-) create mode 100644 testfiles/dict/gen_lang_names.js create mode 100644 testfiles/dict/lang_names.json diff --git a/codegen/src/gen_locales.rs b/codegen/src/gen_locales.rs index bcdeb70..b31a113 100644 --- a/codegen/src/gen_locales.rs +++ b/codegen/src/gen_locales.rs @@ -1,5 +1,7 @@ use std::collections::BTreeMap; use std::fmt::Write; +use std::fs::File; +use std::io::BufReader; use path_macro::path; use reqwest::header; @@ -9,6 +11,7 @@ use serde_with::serde_as; use serde_with::VecSkipError; use crate::model::Text; +use crate::util::DICT_DIR; use crate::util::SRC_DIR; #[serde_as] @@ -141,16 +144,41 @@ struct LanguageCountryCommand { pub async fn generate_locales() { let (languages, countries) = get_locales().await; + let json_path = path!(*DICT_DIR / "lang_names.json"); + let json_file = File::open(json_path).unwrap(); + let lang_names: BTreeMap = + serde_json::from_reader(BufReader::new(json_file)).unwrap(); + let code_head = r#"// This file is automatically generated. DO NOT EDIT. //! Languages and countries +use std::str::FromStr; + use serde::{Deserialize, Serialize}; use crate::error::Error; "#; - let code_foot = r#"serde_plain::derive_fromstr_from_deserialize!(Language, Error); + let code_foot = r#"impl FromStr for Language { + type Err = Error; + + fn from_str(s: &str) -> Result { + let mut sub = s; + loop { + if let Ok(v) = serde_plain::from_str(sub) { + return Ok(v); + } + match sub.rfind('-') { + Some(pos) => { + sub = &sub[..pos]; + } + None => return Err(Error::Other("could not parse language `{s}`".into())), + } + } + } +} + serde_plain::derive_display_from_serialize!(Language); serde_plain::derive_fromstr_from_deserialize!(Country, Error); @@ -199,8 +227,8 @@ pub enum Country { "# .to_owned(); - languages.iter().for_each(|(c, n)| { - let enum_name = c + languages.iter().for_each(|(code, native_name)| { + let enum_name = code .split('-') .map(|c| { format!( @@ -211,10 +239,16 @@ pub enum Country { }) .collect::(); + let en_name = lang_names.get(code).expect(code); + // Language enum - write!(code_langs, " /// {n}\n ").unwrap(); - if c.contains('-') { - write!(code_langs, "#[serde(rename = \"{c}\")]\n ").unwrap(); + if en_name == native_name || code.starts_with("en") { + write!(code_langs, " /// {native_name}\n ").unwrap(); + } else { + write!(code_langs, " /// {en_name} / {native_name}\n ").unwrap(); + } + if code.contains('-') { + write!(code_langs, "#[serde(rename = \"{code}\")]\n ").unwrap(); } code_langs += &enum_name; code_langs += ",\n"; @@ -225,7 +259,7 @@ pub enum Country { // Language names writeln!( code_lang_names, - " Language::{enum_name} => \"{n}\"," + " Language::{enum_name} => \"{native_name}\"," ) .unwrap(); }); diff --git a/src/param/locale.rs b/src/param/locale.rs index 5ac8975..a9f1be2 100644 --- a/src/param/locale.rs +++ b/src/param/locale.rs @@ -2,6 +2,8 @@ //! Languages and countries +use std::str::FromStr; + use serde::{Deserialize, Serialize}; use crate::error::Error; @@ -13,31 +15,31 @@ use crate::error::Error; pub enum Language { /// Afrikaans Af, - /// አማርኛ + /// Amharic / አማርኛ Am, - /// العربية + /// Arabic / العربية Ar, - /// অসমীয়া + /// Assamese / অসমীয়া As, - /// Azərbaycan + /// Azerbaijani / Azərbaycan Az, - /// Беларуская + /// Belarusian / Беларуская Be, - /// Български + /// Bulgarian / Български Bg, - /// বাংলা + /// Bangla / বাংলা Bn, - /// Bosanski + /// Bosnian / Bosanski Bs, - /// Català + /// Catalan / Català Ca, - /// Čeština + /// Czech / Čeština Cs, - /// Dansk + /// Danish / Dansk Da, - /// Deutsch + /// German / Deutsch De, - /// Ελληνικά + /// Greek / Ελληνικά El, /// English (US) En, @@ -47,145 +49,145 @@ pub enum Language { /// English (India) #[serde(rename = "en-IN")] EnIn, - /// Español (España) + /// Spanish / Español (España) Es, - /// Español (Latinoamérica) + /// Latin American Spanish / Español (Latinoamérica) #[serde(rename = "es-419")] Es419, - /// Español (US) + /// Spanish (United States) / Español (US) #[serde(rename = "es-US")] EsUs, - /// Eesti + /// Estonian / Eesti Et, - /// Euskara + /// Basque / Euskara Eu, - /// فارسی + /// Persian / فارسی Fa, - /// Suomi + /// Finnish / Suomi Fi, /// Filipino Fil, - /// Français + /// French / Français Fr, - /// Français (Canada) + /// Canadian French / Français (Canada) #[serde(rename = "fr-CA")] FrCa, - /// Galego + /// Galician / Galego Gl, - /// ગુજરાતી + /// Gujarati / ગુજરાતી Gu, - /// हिन्दी + /// Hindi / हिन्दी Hi, - /// Hrvatski + /// Croatian / Hrvatski Hr, - /// Magyar + /// Hungarian / Magyar Hu, - /// Հայերեն + /// Armenian / Հայերեն Hy, - /// Bahasa Indonesia + /// Indonesian / Bahasa Indonesia Id, - /// Íslenska + /// Icelandic / Íslenska Is, - /// Italiano + /// Italian / Italiano It, - /// עברית + /// Hebrew / עברית Iw, - /// 日本語 + /// Japanese / 日本語 Ja, - /// ქართული + /// Georgian / ქართული Ka, - /// Қазақ Тілі + /// Kazakh / Қазақ Тілі Kk, - /// ខ្មែរ + /// Khmer / ខ្មែរ Km, - /// ಕನ್ನಡ + /// Kannada / ಕನ್ನಡ Kn, - /// 한국어 + /// Korean / 한국어 Ko, - /// Кыргызча + /// Kyrgyz / Кыргызча Ky, - /// ລາວ + /// Lao / ລາວ Lo, - /// Lietuvių + /// Lithuanian / Lietuvių Lt, - /// Latviešu valoda + /// Latvian / Latviešu valoda Lv, - /// Македонски + /// Macedonian / Македонски Mk, - /// മലയാളം + /// Malayalam / മലയാളം Ml, - /// Монгол + /// Mongolian / Монгол Mn, - /// मराठी + /// Marathi / मराठी Mr, - /// Bahasa Malaysia + /// Malay / Bahasa Malaysia Ms, - /// ဗမာ + /// Burmese / ဗမာ My, - /// नेपाली + /// Nepali / नेपाली Ne, - /// Nederlands + /// Dutch / Nederlands Nl, - /// Norsk + /// Norwegian / Norsk No, - /// ଓଡ଼ିଆ + /// Odia / ଓଡ଼ିଆ Or, - /// ਪੰਜਾਬੀ + /// Punjabi / ਪੰਜਾਬੀ Pa, - /// Polski + /// Polish / Polski Pl, - /// Português (Brasil) + /// Portuguese / Português (Brasil) Pt, - /// Português + /// European Portuguese / Português #[serde(rename = "pt-PT")] PtPt, - /// Română + /// Romanian / Română Ro, - /// Русский + /// Russian / Русский Ru, - /// සිංහල + /// Sinhala / සිංහල Si, - /// Slovenčina + /// Slovak / Slovenčina Sk, - /// Slovenščina + /// Slovenian / Slovenščina Sl, - /// Shqip + /// Albanian / Shqip Sq, - /// Српски + /// Serbian / Српски Sr, - /// Srpski + /// Serbian (Latin) / Srpski #[serde(rename = "sr-Latn")] SrLatn, - /// Svenska + /// Swedish / Svenska Sv, - /// Kiswahili + /// Swahili / Kiswahili Sw, - /// தமிழ் + /// Tamil / தமிழ் Ta, - /// తెలుగు + /// Telugu / తెలుగు Te, - /// ภาษาไทย + /// Thai / ภาษาไทย Th, - /// Türkçe + /// Turkish / Türkçe Tr, - /// Українська + /// Ukrainian / Українська Uk, - /// اردو + /// Urdu / اردو Ur, - /// O‘zbek + /// Uzbek / O‘zbek Uz, - /// Tiếng Việt + /// Vietnamese / Tiếng Việt Vi, - /// 中文 (简体) + /// Chinese (China) / 中文 (简体) #[serde(rename = "zh-CN")] ZhCn, - /// 中文 (香港) + /// Chinese (Hong Kong) / 中文 (香港) #[serde(rename = "zh-HK")] ZhHk, - /// 中文 (繁體) + /// Chinese (Taiwan) / 中文 (繁體) #[serde(rename = "zh-TW")] ZhTw, - /// IsiZulu + /// Zulu / IsiZulu Zu, } @@ -829,7 +831,25 @@ impl Country { } } -serde_plain::derive_fromstr_from_deserialize!(Language, Error); +impl FromStr for Language { + type Err = Error; + + fn from_str(s: &str) -> Result { + let mut sub = s; + loop { + if let Ok(v) = serde_plain::from_str(sub) { + return Ok(v); + } + match sub.rfind('-') { + Some(pos) => { + sub = &sub[..pos]; + } + None => return Err(Error::Other("could not parse language `{s}`".into())), + } + } + } +} + serde_plain::derive_display_from_serialize!(Language); serde_plain::derive_fromstr_from_deserialize!(Country, Error); diff --git a/src/util/mod.rs b/src/util/mod.rs index e794681..f77e600 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -618,4 +618,16 @@ pub(crate) mod tests { let res_str = res.join(" "); assert_eq!(res_str, teststr) } + + #[rstest] + #[case("en", Some(Language::En))] + #[case("en-GB", Some(Language::EnGb))] + #[case("en-US", Some(Language::En))] + #[case("en-ZZ", Some(Language::En))] + #[case("xy", None)] + #[case("xy-ZZ", None)] + fn parse_language(#[case] s: &str, #[case] expect: Option) { + let res = Language::from_str(s).ok(); + assert_eq!(res, expect); + } } diff --git a/testfiles/dict/gen_lang_names.js b/testfiles/dict/gen_lang_names.js new file mode 100644 index 0000000..8dde523 --- /dev/null +++ b/testfiles/dict/gen_lang_names.js @@ -0,0 +1,16 @@ +const fs = require("fs"); + +const dict = JSON.parse(fs.readFileSync("dictionary.json")); + +const intl = new Intl.DisplayNames(["en"], { type: "language" }); + +let langs = Object.keys(dict); +Object.values(dict).forEach(entry => { + if (entry.equivalent) { + langs.push(...entry.equivalent); + } +}); +langs.sort(); + +const res = Object.fromEntries(langs.map((l) => [l, intl.of(l)])); +fs.writeFileSync("lang_names.json", JSON.stringify(res, null, 2)); diff --git a/testfiles/dict/lang_names.json b/testfiles/dict/lang_names.json new file mode 100644 index 0000000..730ff2e --- /dev/null +++ b/testfiles/dict/lang_names.json @@ -0,0 +1,85 @@ +{ + "af": "Afrikaans", + "am": "Amharic", + "ar": "Arabic", + "as": "Assamese", + "az": "Azerbaijani", + "be": "Belarusian", + "bg": "Bulgarian", + "bn": "Bangla", + "bs": "Bosnian", + "ca": "Catalan", + "cs": "Czech", + "da": "Danish", + "de": "German", + "el": "Greek", + "en": "English", + "en-GB": "British English", + "en-IN": "English (India)", + "es": "Spanish", + "es-419": "Latin American Spanish", + "es-US": "Spanish (United States)", + "et": "Estonian", + "eu": "Basque", + "fa": "Persian", + "fi": "Finnish", + "fil": "Filipino", + "fr": "French", + "fr-CA": "Canadian French", + "gl": "Galician", + "gu": "Gujarati", + "hi": "Hindi", + "hr": "Croatian", + "hu": "Hungarian", + "hy": "Armenian", + "id": "Indonesian", + "is": "Icelandic", + "it": "Italian", + "iw": "Hebrew", + "ja": "Japanese", + "ka": "Georgian", + "kk": "Kazakh", + "km": "Khmer", + "kn": "Kannada", + "ko": "Korean", + "ky": "Kyrgyz", + "lo": "Lao", + "lt": "Lithuanian", + "lv": "Latvian", + "mk": "Macedonian", + "ml": "Malayalam", + "mn": "Mongolian", + "mr": "Marathi", + "ms": "Malay", + "my": "Burmese", + "ne": "Nepali", + "nl": "Dutch", + "no": "Norwegian", + "or": "Odia", + "pa": "Punjabi", + "pl": "Polish", + "pt": "Portuguese", + "pt-PT": "European Portuguese", + "ro": "Romanian", + "ru": "Russian", + "si": "Sinhala", + "sk": "Slovak", + "sl": "Slovenian", + "sq": "Albanian", + "sr": "Serbian", + "sr-Latn": "Serbian (Latin)", + "sv": "Swedish", + "sw": "Swahili", + "ta": "Tamil", + "te": "Telugu", + "th": "Thai", + "tr": "Turkish", + "uk": "Ukrainian", + "ur": "Urdu", + "uz": "Uzbek", + "vi": "Vietnamese", + "zh-CN": "Chinese (China)", + "zh-HK": "Chinese (Hong Kong)", + "zh-TW": "Chinese (Taiwan)", + "zu": "Zulu" +} diff --git a/tests/youtube.rs b/tests/youtube.rs index 3660e84..8d71802 100644 --- a/tests/youtube.rs +++ b/tests/youtube.rs @@ -1172,12 +1172,8 @@ fn search_suggestion(rp: RustyPipe) { #[rstest] fn search_suggestion_empty(rp: RustyPipe) { - let result = tokio_test::block_on( - rp.query() - .lang(Language::Th) - .search_suggestion("fjew327p4ifjelwfvnewg49"), - ) - .unwrap(); + let result = + tokio_test::block_on(rp.query().search_suggestion("fjew327p4ifjelwfvnewg49")).unwrap(); assert!(result.is_empty()); }