feat: add number_tokens for parsing large nums to dictionary
This commit is contained in:
parent
67ae1eb21d
commit
5d19259a14
21 changed files with 5219 additions and 38 deletions
|
|
@ -34,17 +34,47 @@ pub fn generate_dictionary(project_root: &Path) {
|
|||
let dict = util::read_dict(project_root);
|
||||
|
||||
let code_head = r#"// This file is automatically generated. DO NOT EDIT.
|
||||
// See codegen/gen_dictionary.rs for the generation code.
|
||||
use crate::{
|
||||
model::Language,
|
||||
timeago::{DateCmp, TaToken, TimeUnit},
|
||||
};
|
||||
|
||||
/// The dictionary contains the information required to parse dates and numbers
|
||||
/// in all supported languages.
|
||||
pub struct Entry {
|
||||
/// Should the language be parsed by character instead of by word?
|
||||
/// (e.g. Chinese/Japanese)
|
||||
pub by_char: bool,
|
||||
/// Tokens for parsing timeago strings.
|
||||
///
|
||||
/// Format: Parsed token -> \[Quantity\] Identifier
|
||||
///
|
||||
/// Identifiers: `Y`(ear), `M`(month), `W`(eek), `D`(ay),
|
||||
/// `h`(our), `m`(inute), `s`(econd)
|
||||
pub timeago_tokens: phf::Map<&'static str, TaToken>,
|
||||
/// Order in which to parse numeric date components. Formatted as
|
||||
/// a string of date identifiers (Y, M, D).
|
||||
///
|
||||
/// Examples:
|
||||
///
|
||||
/// - 03.01.2020 => `"DMY"`
|
||||
/// - Jan 3, 2020 => `"DY"`
|
||||
pub date_order: &'static [DateCmp],
|
||||
/// Tokens for parsing month names.
|
||||
///
|
||||
/// Format: Parsed token -> Month number (starting from 1)
|
||||
pub months: phf::Map<&'static str, u8>,
|
||||
/// Tokens for parsing date strings with no digits (e.g. Today, Tomorrow)
|
||||
///
|
||||
/// Format: Parsed token -> \[Quantity\] Identifier
|
||||
pub timeago_nd_tokens: phf::Map<&'static str, TaToken>,
|
||||
/// Are commas (instead of points) used as decimal separators?
|
||||
pub comma_decimal: bool,
|
||||
/// Tokens for parsing decimal prefixes (K, M, B, ...)
|
||||
///
|
||||
/// Format: Parsed token -> decimal power
|
||||
pub number_tokens: phf::Map<&'static str, u8>,
|
||||
}
|
||||
"#;
|
||||
|
||||
|
|
@ -100,12 +130,19 @@ pub fn entry(lang: Language) -> Entry {
|
|||
});
|
||||
date_order = date_order.trim_end_matches([' ', ',']).to_owned() + "]";
|
||||
|
||||
// Number tokens
|
||||
let mut number_tokens = phf_codegen::Map::<&str>::new();
|
||||
entry.number_tokens.iter().for_each(|(txt, mag)| {
|
||||
number_tokens.entry(txt, &mag.to_string());
|
||||
});
|
||||
|
||||
let code_ta_tokens = &ta_tokens.build().to_string().replace('\n', "\n ");
|
||||
let code_ta_nd_tokens = &ta_nd_tokens.build().to_string().replace('\n', "\n ");
|
||||
let code_months = &months.build().to_string().replace('\n', "\n ");
|
||||
let code_number_tokens = &number_tokens.build().to_string().replace('\n', "\n ");
|
||||
|
||||
let _ = write!(code_timeago_tokens, "{} => Entry {{\n by_char: {:?},\n timeago_tokens: {},\n date_order: {},\n months: {},\n timeago_nd_tokens: {},\n }},\n ",
|
||||
selector, entry.by_char, code_ta_tokens, date_order, code_months, code_ta_nd_tokens);
|
||||
let _ = write!(code_timeago_tokens, "{} => Entry {{\n by_char: {:?},\n timeago_tokens: {},\n date_order: {},\n months: {},\n timeago_nd_tokens: {},\n comma_decimal: {:?},\n number_tokens: {},\n }},\n ",
|
||||
selector, entry.by_char, code_ta_tokens, date_order, code_months, code_ta_nd_tokens, entry.comma_decimal, code_number_tokens);
|
||||
});
|
||||
|
||||
code_timeago_tokens = code_timeago_tokens.trim_end().to_owned() + "\n }\n}\n";
|
||||
|
|
|
|||
Reference in a new issue