feat: add number_tokens for parsing large nums to dictionary

This commit is contained in:
ThetaDev 2022-09-23 15:04:22 +02:00
parent 67ae1eb21d
commit 5d19259a14
21 changed files with 5219 additions and 38 deletions

View file

@ -34,17 +34,47 @@ pub fn generate_dictionary(project_root: &Path) {
let dict = util::read_dict(project_root);
let code_head = r#"// This file is automatically generated. DO NOT EDIT.
// See codegen/gen_dictionary.rs for the generation code.
use crate::{
model::Language,
timeago::{DateCmp, TaToken, TimeUnit},
};
/// The dictionary contains the information required to parse dates and numbers
/// in all supported languages.
pub struct Entry {
/// Should the language be parsed by character instead of by word?
/// (e.g. Chinese/Japanese)
pub by_char: bool,
/// Tokens for parsing timeago strings.
///
/// Format: Parsed token -> \[Quantity\] Identifier
///
/// Identifiers: `Y`(ear), `M`(month), `W`(eek), `D`(ay),
/// `h`(our), `m`(inute), `s`(econd)
pub timeago_tokens: phf::Map<&'static str, TaToken>,
/// Order in which to parse numeric date components. Formatted as
/// a string of date identifiers (Y, M, D).
///
/// Examples:
///
/// - 03.01.2020 => `"DMY"`
/// - Jan 3, 2020 => `"DY"`
pub date_order: &'static [DateCmp],
/// Tokens for parsing month names.
///
/// Format: Parsed token -> Month number (starting from 1)
pub months: phf::Map<&'static str, u8>,
/// Tokens for parsing date strings with no digits (e.g. Today, Tomorrow)
///
/// Format: Parsed token -> \[Quantity\] Identifier
pub timeago_nd_tokens: phf::Map<&'static str, TaToken>,
/// Are commas (instead of points) used as decimal separators?
pub comma_decimal: bool,
/// Tokens for parsing decimal prefixes (K, M, B, ...)
///
/// Format: Parsed token -> decimal power
pub number_tokens: phf::Map<&'static str, u8>,
}
"#;
@ -100,12 +130,19 @@ pub fn entry(lang: Language) -> Entry {
});
date_order = date_order.trim_end_matches([' ', ',']).to_owned() + "]";
// Number tokens
let mut number_tokens = phf_codegen::Map::<&str>::new();
entry.number_tokens.iter().for_each(|(txt, mag)| {
number_tokens.entry(txt, &mag.to_string());
});
let code_ta_tokens = &ta_tokens.build().to_string().replace('\n', "\n ");
let code_ta_nd_tokens = &ta_nd_tokens.build().to_string().replace('\n', "\n ");
let code_months = &months.build().to_string().replace('\n', "\n ");
let code_number_tokens = &number_tokens.build().to_string().replace('\n', "\n ");
let _ = write!(code_timeago_tokens, "{} => Entry {{\n by_char: {:?},\n timeago_tokens: {},\n date_order: {},\n months: {},\n timeago_nd_tokens: {},\n }},\n ",
selector, entry.by_char, code_ta_tokens, date_order, code_months, code_ta_nd_tokens);
let _ = write!(code_timeago_tokens, "{} => Entry {{\n by_char: {:?},\n timeago_tokens: {},\n date_order: {},\n months: {},\n timeago_nd_tokens: {},\n comma_decimal: {:?},\n number_tokens: {},\n }},\n ",
selector, entry.by_char, code_ta_tokens, date_order, code_months, code_ta_nd_tokens, entry.comma_decimal, code_number_tokens);
});
code_timeago_tokens = code_timeago_tokens.trim_end().to_owned() + "\n }\n}\n";