This repository has been archived on 2026-05-27. You can view files and clone it, but you cannot make any changes to it's state, such as pushing and creating new issues, pull requests or comments.
rustypipe/codegen/src/gen_dictionary.rs
ThetaDev 45e2d3c7c7 feat: multilanguage album type parsing
- new album types: Audiobook, Show
2022-11-01 18:10:28 +01:00

167 lines
6.4 KiB
Rust

use std::fmt::Write;
use std::path::Path;
use fancy_regex::Regex;
use once_cell::sync::Lazy;
use rustypipe::timeago::TimeUnit;
use crate::util;
const TARGET_PATH: &str = "src/util/dictionary.rs";
fn parse_tu(tu: &str) -> (u8, Option<TimeUnit>) {
static TU_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"^(\d*)(\w?)$").unwrap());
match TU_PATTERN.captures(tu).unwrap() {
Some(cap) => (
cap.get(1).unwrap().as_str().parse().unwrap_or(1),
match cap.get(2).unwrap().as_str() {
"s" => Some(TimeUnit::Second),
"m" => Some(TimeUnit::Minute),
"h" => Some(TimeUnit::Hour),
"D" => Some(TimeUnit::Day),
"W" => Some(TimeUnit::Week),
"M" => Some(TimeUnit::Month),
"Y" => Some(TimeUnit::Year),
"" => None,
_ => panic!("invalid time unit: {}", tu),
},
),
None => panic!("invalid time unit: {}", tu),
}
}
pub fn generate_dictionary(project_root: &Path) {
let dict = util::read_dict(project_root);
let code_head = r#"// This file is automatically generated. DO NOT EDIT.
// See codegen/gen_dictionary.rs for the generation code.
use crate::{
model::AlbumType,
param::Language,
timeago::{DateCmp, TaToken, TimeUnit},
};
/// The dictionary contains the information required to parse dates and numbers
/// in all supported languages.
pub(crate) struct Entry {
/// Should the language be parsed by character instead of by word?
/// (e.g. Chinese/Japanese)
pub by_char: bool,
/// Tokens for parsing timeago strings.
///
/// Format: Parsed token -> \[Quantity\] Identifier
///
/// Identifiers: `Y`(ear), `M`(month), `W`(eek), `D`(ay),
/// `h`(our), `m`(inute), `s`(econd)
pub timeago_tokens: phf::Map<&'static str, TaToken>,
/// Order in which to parse numeric date components. Formatted as
/// a string of date identifiers (Y, M, D).
///
/// Examples:
///
/// - 03.01.2020 => `"DMY"`
/// - Jan 3, 2020 => `"DY"`
pub date_order: &'static [DateCmp],
/// Tokens for parsing month names.
///
/// Format: Parsed token -> Month number (starting from 1)
pub months: phf::Map<&'static str, u8>,
/// Tokens for parsing date strings with no digits (e.g. Today, Tomorrow)
///
/// Format: Parsed token -> \[Quantity\] Identifier
pub timeago_nd_tokens: phf::Map<&'static str, TaToken>,
/// Are commas (instead of points) used as decimal separators?
pub comma_decimal: bool,
/// Tokens for parsing decimal prefixes (K, M, B, ...)
///
/// Format: Parsed token -> decimal power
pub number_tokens: phf::Map<&'static str, u8>,
/// Names of album types (Album, Single, ...)
///
/// Format: Parsed text -> Album type
pub album_types: phf::Map<&'static str, AlbumType>,
}
"#;
let mut code_timeago_tokens = r#"#[rustfmt::skip]
pub(crate) fn entry(lang: Language) -> Entry {
match lang {
"#
.to_owned();
dict.iter().for_each(|(lang, entry)| {
// Match selector
let mut selector = format!("Language::{:?}", lang);
entry.equivalent.iter().for_each(|eq| {
let _ = write!(selector, " | Language::{:?}", eq);
});
// Timeago tokens
let mut ta_tokens = phf_codegen::Map::<&str>::new();
entry.timeago_tokens.iter().for_each(|(txt, tu_str)| {
let (n, unit) = parse_tu(tu_str);
match unit {
Some(unit) => ta_tokens.entry(
txt,
&format!("TaToken {{ n: {}, unit: Some(TimeUnit::{:?}) }}", n, unit),
),
None => ta_tokens.entry(txt, &format!("TaToken {{ n: {}, unit: None }}", n)),
};
});
// Months
let mut months = phf_codegen::Map::<&str>::new();
entry.months.iter().for_each(|(txt, n_mon)| {
months.entry(txt, &n_mon.to_string());
});
// Timeago(ND) tokens
let mut ta_nd_tokens = phf_codegen::Map::<&str>::new();
entry.timeago_nd_tokens.iter().for_each(|(txt, tu_str)| {
let (n, unit) = parse_tu(tu_str);
match unit {
Some(unit) => ta_nd_tokens.entry(
txt,
&format!("TaToken {{ n: {}, unit: Some(TimeUnit::{:?}) }}", n, unit),
),
None => ta_nd_tokens.entry(txt, &format!("TaToken {{ n: {}, unit: None }}", n)),
};
});
// Date order
let mut date_order = "&[".to_owned();
entry.date_order.chars().for_each(|c| {
let _ = write!(date_order, "DateCmp::{}, ", c);
});
date_order = date_order.trim_end_matches([' ', ',']).to_owned() + "]";
// Number tokens
let mut number_tokens = phf_codegen::Map::<&str>::new();
entry.number_tokens.iter().for_each(|(txt, mag)| {
number_tokens.entry(txt, &mag.to_string());
});
// Album types
let mut album_types = phf_codegen::Map::<&str>::new();
entry.album_types.iter().for_each(|(txt, album_type)| {
album_types.entry(txt, &format!("AlbumType::{:?}", album_type));
});
let code_ta_tokens = &ta_tokens.build().to_string().replace('\n', "\n ");
let code_ta_nd_tokens = &ta_nd_tokens.build().to_string().replace('\n', "\n ");
let code_months = &months.build().to_string().replace('\n', "\n ");
let code_number_tokens = &number_tokens.build().to_string().replace('\n', "\n ");
let code_album_types = &album_types.build().to_string().replace('\n', "\n ");
let _ = write!(code_timeago_tokens, "{} => Entry {{\n by_char: {:?},\n timeago_tokens: {},\n date_order: {},\n months: {},\n timeago_nd_tokens: {},\n comma_decimal: {:?},\n number_tokens: {},\n album_types: {},\n }},\n ",
selector, entry.by_char, code_ta_tokens, date_order, code_months, code_ta_nd_tokens, entry.comma_decimal, code_number_tokens, code_album_types);
});
code_timeago_tokens = code_timeago_tokens.trim_end().to_owned() + "\n }\n}\n";
let code = format!("{}\n{}", code_head, code_timeago_tokens);
let mut target_path = project_root.to_path_buf();
target_path.push(TARGET_PATH);
std::fs::write(target_path, code).unwrap();
}