rustypipe/codegen/src/gen_dictionary.rs

use std::fmt::Write;
use std::path::Path;

use fancy_regex::Regex;
use once_cell::sync::Lazy;
use rustypipe::timeago::TimeUnit;

use crate::util;

const TARGET_PATH: &str = "src/util/dictionary.rs";

fn parse_tu(tu: &str) -> (u8, Option<TimeUnit>) {
    static TU_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"^(\d*)(\w?)$").unwrap());
    match TU_PATTERN.captures(tu).unwrap() {
        Some(cap) => (
            cap.get(1).unwrap().as_str().parse().unwrap_or(1),
            match cap.get(2).unwrap().as_str() {
                "s" => Some(TimeUnit::Second),
                "m" => Some(TimeUnit::Minute),
                "h" => Some(TimeUnit::Hour),
                "D" => Some(TimeUnit::Day),
                "W" => Some(TimeUnit::Week),
                "M" => Some(TimeUnit::Month),
                "Y" => Some(TimeUnit::Year),
                "" => None,
                _ => panic!("invalid time unit: {}", tu),
            },
        ),
        None => panic!("invalid time unit: {}", tu),
    }
}

pub fn generate_dictionary(project_root: &Path) {
    let dict = util::read_dict(project_root);

    let code_head = r#"// This file is automatically generated. DO NOT EDIT.
// See codegen/gen_dictionary.rs for the generation code.
use crate::{
    model::AlbumType,
    param::Language,
    timeago::{DateCmp, TaToken, TimeUnit},
};

/// The dictionary contains the information required to parse dates and numbers
/// in all supported languages.
pub(crate) struct Entry {
    /// Should the language be parsed by character instead of by word?
    /// (e.g. Chinese/Japanese)
    pub by_char: bool,
    /// Tokens for parsing timeago strings.
    ///
    /// Format: Parsed token -> \[Quantity\] Identifier
    ///
    /// Identifiers: `Y`(ear), `M`(month), `W`(eek), `D`(ay),
    /// `h`(our), `m`(inute), `s`(econd)
    pub timeago_tokens: phf::Map<&'static str, TaToken>,
    /// Order in which to parse numeric date components. Formatted as
    /// a string of date identifiers (Y, M, D).
    ///
    /// Examples:
    ///
    /// - 03.01.2020 => `"DMY"`
    /// - Jan 3, 2020 => `"DY"`
    pub date_order: &'static [DateCmp],
    /// Tokens for parsing month names.
    ///
    /// Format: Parsed token -> Month number (starting from 1)
    pub months: phf::Map<&'static str, u8>,
    /// Tokens for parsing date strings with no digits (e.g. Today, Tomorrow)
    ///
    /// Format: Parsed token -> \[Quantity\] Identifier
    pub timeago_nd_tokens: phf::Map<&'static str, TaToken>,
    /// Are commas (instead of points) used as decimal separators?
    pub comma_decimal: bool,
    /// Tokens for parsing decimal prefixes (K, M, B, ...)
    ///
    /// Format: Parsed token -> decimal power
    pub number_tokens: phf::Map<&'static str, u8>,
    /// Names of album types (Album, Single, ...)
    ///
    /// Format: Parsed text -> Album type
    pub album_types: phf::Map<&'static str, AlbumType>,
}
"#;

    let mut code_timeago_tokens = r#"#[rustfmt::skip]
pub(crate) fn entry(lang: Language) -> Entry {
    match lang {
        "#
    .to_owned();

    dict.iter().for_each(|(lang, entry)| {
        // Match selector
        let mut selector = format!("Language::{:?}", lang);
        entry.equivalent.iter().for_each(|eq| {
            let _ = write!(selector, " | Language::{:?}", eq);
        });

        // Timeago tokens
        let mut ta_tokens = phf_codegen::Map::<&str>::new();
        entry.timeago_tokens.iter().for_each(|(txt, tu_str)| {
            let (n, unit) = parse_tu(tu_str);
            match unit {
                Some(unit) => ta_tokens.entry(
                    txt,
                    &format!("TaToken {{ n: {}, unit: Some(TimeUnit::{:?}) }}", n, unit),
                ),
                None => ta_tokens.entry(txt, &format!("TaToken {{ n: {}, unit: None }}", n)),
            };
        });

        // Months
        let mut months = phf_codegen::Map::<&str>::new();
        entry.months.iter().for_each(|(txt, n_mon)| {
            months.entry(txt, &n_mon.to_string());
        });

        // Timeago(ND) tokens
        let mut ta_nd_tokens = phf_codegen::Map::<&str>::new();
        entry.timeago_nd_tokens.iter().for_each(|(txt, tu_str)| {
            let (n, unit) = parse_tu(tu_str);
            match unit {
                Some(unit) => ta_nd_tokens.entry(
                    txt,
                    &format!("TaToken {{ n: {}, unit: Some(TimeUnit::{:?}) }}", n, unit),
                ),
                None => ta_nd_tokens.entry(txt, &format!("TaToken {{ n: {}, unit: None }}", n)),
            };
        });

        // Date order
        let mut date_order = "&[".to_owned();
        entry.date_order.chars().for_each(|c| {
            let _ = write!(date_order, "DateCmp::{}, ", c);
        });
        date_order = date_order.trim_end_matches([' ', ',']).to_owned() + "]";

        // Number tokens
        let mut number_tokens = phf_codegen::Map::<&str>::new();
        entry.number_tokens.iter().for_each(|(txt, mag)| {
            number_tokens.entry(txt, &mag.to_string());
        });

        // Album types
        let mut album_types = phf_codegen::Map::<&str>::new();
        entry.album_types.iter().for_each(|(txt, album_type)| {
            album_types.entry(txt, &format!("AlbumType::{:?}", album_type));
        });

        let code_ta_tokens = &ta_tokens.build().to_string().replace('\n', "\n            ");
        let code_ta_nd_tokens = &ta_nd_tokens.build().to_string().replace('\n', "\n            ");
        let code_months = &months.build().to_string().replace('\n', "\n            ");
        let code_number_tokens = &number_tokens.build().to_string().replace('\n', "\n            ");
        let code_album_types = &album_types.build().to_string().replace('\n', "\n            ");

        let _ = write!(code_timeago_tokens, "{} => Entry {{\n            by_char: {:?},\n            timeago_tokens: {},\n            date_order: {},\n            months: {},\n            timeago_nd_tokens: {},\n            comma_decimal: {:?},\n            number_tokens: {},\n            album_types: {},\n        }},\n        ",
        selector, entry.by_char, code_ta_tokens, date_order, code_months, code_ta_nd_tokens, entry.comma_decimal, code_number_tokens, code_album_types);
    });

    code_timeago_tokens = code_timeago_tokens.trim_end().to_owned() + "\n    }\n}\n";

    let code = format!("{}\n{}", code_head, code_timeago_tokens);

    let mut target_path = project_root.to_path_buf();
    target_path.push(TARGET_PATH);
    std::fs::write(target_path, code).unwrap();
}