feat: add number_tokens for parsing large nums to dictionary

2022-09-23 15:04:22 +02:00 · 2022-09-23 15:04:22 +02:00 · 5d19259a14
commit 5d19259a14
parent 67ae1eb21d
21 changed files with 5219 additions and 38 deletions
--- a/codegen/src/gen_dictionary.rs
+++ b/codegen/src/gen_dictionary.rs
@ -34,17 +34,47 @@ pub fn generate_dictionary(project_root: &Path) {
    let dict = util::read_dict(project_root);

    let code_head = r#"// This file is automatically generated. DO NOT EDIT.
+// See codegen/gen_dictionary.rs for the generation code.
 use crate::{
    model::Language,
    timeago::{DateCmp, TaToken, TimeUnit},
 };

+/// The dictionary contains the information required to parse dates and numbers
+/// in all supported languages.
 pub struct Entry {
+    /// Should the language be parsed by character instead of by word?
+    /// (e.g. Chinese/Japanese)
    pub by_char: bool,
+    /// Tokens for parsing timeago strings.
+    ///
+    /// Format: Parsed token -> \[Quantity\] Identifier
+    ///
+    /// Identifiers: `Y`(ear), `M`(month), `W`(eek), `D`(ay),
+    /// `h`(our), `m`(inute), `s`(econd)
    pub timeago_tokens: phf::Map<&'static str, TaToken>,
+    /// Order in which to parse numeric date components. Formatted as
+    /// a string of date identifiers (Y, M, D).
+    ///
+    /// Examples:
+    ///
+    /// - 03.01.2020 => `"DMY"`
+    /// - Jan 3, 2020 => `"DY"`
    pub date_order: &'static [DateCmp],
+    /// Tokens for parsing month names.
+    ///
+    /// Format: Parsed token -> Month number (starting from 1)
    pub months: phf::Map<&'static str, u8>,
+    /// Tokens for parsing date strings with no digits (e.g. Today, Tomorrow)
+    ///
+    /// Format: Parsed token -> \[Quantity\] Identifier
    pub timeago_nd_tokens: phf::Map<&'static str, TaToken>,
+    /// Are commas (instead of points) used as decimal separators?
+    pub comma_decimal: bool,
+    /// Tokens for parsing decimal prefixes (K, M, B, ...)
+    ///
+    /// Format: Parsed token -> decimal power
+    pub number_tokens: phf::Map<&'static str, u8>,
 }
 "#;

@ -100,12 +130,19 @@ pub fn entry(lang: Language) -> Entry {
        });
        date_order = date_order.trim_end_matches([' ', ',']).to_owned() + "]";

+        // Number tokens
+        let mut number_tokens = phf_codegen::Map::<&str>::new();
+        entry.number_tokens.iter().for_each(|(txt, mag)| {
+            number_tokens.entry(txt, &mag.to_string());
+        });
+
        let code_ta_tokens = &ta_tokens.build().to_string().replace('\n', "\n            ");
        let code_ta_nd_tokens = &ta_nd_tokens.build().to_string().replace('\n', "\n            ");
        let code_months = &months.build().to_string().replace('\n', "\n            ");
+        let code_number_tokens = &number_tokens.build().to_string().replace('\n', "\n            ");

-        let _ = write!(code_timeago_tokens, "{} => Entry {{\n            by_char: {:?},\n            timeago_tokens: {},\n            date_order: {},\n            months: {},\n            timeago_nd_tokens: {},\n        }},\n        ",
-        selector, entry.by_char, code_ta_tokens, date_order, code_months, code_ta_nd_tokens);
+        let _ = write!(code_timeago_tokens, "{} => Entry {{\n            by_char: {:?},\n            timeago_tokens: {},\n            date_order: {},\n            months: {},\n            timeago_nd_tokens: {},\n            comma_decimal: {:?},\n            number_tokens: {},\n        }},\n        ",
+        selector, entry.by_char, code_ta_tokens, date_order, code_months, code_ta_nd_tokens, entry.comma_decimal, code_number_tokens);
    });

    code_timeago_tokens = code_timeago_tokens.trim_end().to_owned() + "\n    }\n}\n";