rustypipe/codegen/src/gen_dictionary.rs

use std::fmt::Write;

use once_cell::sync::Lazy;
use path_macro::path;
use regex::Regex;

use crate::{
    model::TimeUnit,
    util::{self, SRC_DIR},
};

fn parse_tu(tu: &str) -> (u8, Option<TimeUnit>) {
    static TU_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"^(\d*)(\w?)$").unwrap());
    match TU_PATTERN.captures(tu) {
        Some(cap) => (
            cap.get(1).unwrap().as_str().parse().unwrap_or(1),
            match cap.get(2).unwrap().as_str() {
                "s" => Some(TimeUnit::Second),
                "m" => Some(TimeUnit::Minute),
                "h" => Some(TimeUnit::Hour),
                "D" => Some(TimeUnit::Day),
                "W" => Some(TimeUnit::Week),
                "M" => Some(TimeUnit::Month),
                "Y" => Some(TimeUnit::Year),
                "" => None,
                _ => panic!("invalid time unit: {tu}"),
            },
        ),
        None => panic!("invalid time unit: {tu}"),
    }
}

pub fn generate_dictionary() {
    let dict = util::read_dict();

    let code_head = r#"// This file is automatically generated. DO NOT EDIT.
// See codegen/gen_dictionary.rs for the generation code.
#![allow(clippy::unreadable_literal)]

//! The dictionary contains the information required to parse dates and numbers
//! in all supported languages.

use crate::{
    model::AlbumType,
    param::Language,
    util::timeago::{DateCmp, TaToken, TimeUnit},
};

/// Dictionary entry containing language-specific parsing information
pub(crate) struct Entry {
    /// Tokens for parsing timeago strings.
    ///
    /// Format: Parsed token -> \[Quantity\] Identifier
    ///
    /// Identifiers: `Y`(ear), `M`(month), `W`(eek), `D`(ay),
    /// `h`(our), `m`(inute), `s`(econd)
    pub timeago_tokens: phf::Map<&'static str, TaToken>,
    /// Order in which to parse numeric date components. Formatted as
    /// a string of date identifiers (Y, M, D).
    ///
    /// Examples:
    ///
    /// - 03.01.2020 => `"DMY"`
    /// - Jan 3, 2020 => `"DY"`
    pub date_order: &'static [DateCmp],
    /// Tokens for parsing month names.
    ///
    /// Format: Parsed token -> Month number (starting from 1)
    pub months: phf::Map<&'static str, u8>,
    /// Tokens for parsing date strings with no digits (e.g. Today, Tomorrow)
    ///
    /// Format: Parsed token -> \[Quantity\] Identifier
    pub timeago_nd_tokens: phf::Map<&'static str, TaToken>,
    /// Are commas (instead of points) used as decimal separators?
    pub comma_decimal: bool,
    /// Tokens for parsing decimal prefixes (K, M, B, ...)
    ///
    /// Format: Parsed token -> decimal power
    pub number_tokens: phf::Map<&'static str, u8>,
    /// Tokens for parsing number strings with no digits (e.g. "No videos")
    ///
    /// Format: Parsed token -> value
    pub number_nd_tokens: phf::Map<&'static str, u8>,
    /// Names of album types (Album, Single, ...)
    ///
    /// Format: Parsed text -> Album type
    pub album_types: phf::Map<&'static str, AlbumType>,
}
"#;

    let mut code_timeago_tokens = r#"#[rustfmt::skip]
pub(crate) fn entry(lang: Language) -> Entry {
    match lang {
        "#
    .to_owned();

    for (lang, entry) in &dict {
        // Match selector
        let mut selector = format!("Language::{lang:?}");
        entry.equivalent.iter().for_each(|eq| {
            write!(selector, " | Language::{eq:?}").unwrap();
        });

        // Timeago tokens
        let mut ta_tokens = phf_codegen::Map::<&str>::new();
        entry.timeago_tokens.iter().for_each(|(txt, tu_str)| {
            let (n, unit) = parse_tu(tu_str);
            match unit {
                Some(unit) => ta_tokens.entry(
                    txt,
                    &format!("TaToken {{ n: {n}, unit: Some(TimeUnit::{unit:?}) }}"),
                ),
                None => ta_tokens.entry(txt, &format!("TaToken {{ n: {n}, unit: None }}")),
            };
        });

        // Months
        let mut months = phf_codegen::Map::<&str>::new();
        entry.months.iter().for_each(|(txt, n_mon)| {
            months.entry(txt, &n_mon.to_string());
        });

        // Timeago(ND) tokens
        let mut ta_nd_tokens = phf_codegen::Map::<&str>::new();
        entry.timeago_nd_tokens.iter().for_each(|(txt, tu_str)| {
            let (n, unit) = parse_tu(tu_str);
            match unit {
                Some(unit) => ta_nd_tokens.entry(
                    txt,
                    &format!("TaToken {{ n: {n}, unit: Some(TimeUnit::{unit:?}) }}"),
                ),
                None => ta_nd_tokens.entry(txt, &format!("TaToken {{ n: {n}, unit: None }}")),
            };
        });

        // Date order
        let mut date_order = "&[".to_owned();
        entry.date_order.chars().for_each(|c| {
            write!(date_order, "DateCmp::{c}, ").unwrap();
        });
        date_order = date_order.trim_end_matches([' ', ',']).to_owned() + "]";

        // Number tokens
        let mut number_tokens = phf_codegen::Map::<&str>::new();
        entry.number_tokens.iter().for_each(|(txt, mag)| {
            number_tokens.entry(txt, &mag.to_string());
        });

        // Number nd tokens
        let mut number_nd_tokens = phf_codegen::Map::<&str>::new();
        entry.number_nd_tokens.iter().for_each(|(txt, mag)| {
            number_nd_tokens.entry(txt, &mag.to_string());
        });

        // Album types
        let mut album_types = phf_codegen::Map::<&str>::new();
        entry.album_types.iter().for_each(|(txt, album_type)| {
            album_types.entry(txt, &format!("AlbumType::{album_type:?}"));
        });

        let code_ta_tokens = &ta_tokens
            .build()
            .to_string()
            .replace('\n', "\n            ");
        let code_ta_nd_tokens = &ta_nd_tokens
            .build()
            .to_string()
            .replace('\n', "\n            ");
        let code_months = &months.build().to_string().replace('\n', "\n            ");
        let code_number_tokens = &number_tokens
            .build()
            .to_string()
            .replace('\n', "\n            ");
        let code_number_nd_tokens = &number_nd_tokens
            .build()
            .to_string()
            .replace('\n', "\n            ");
        let code_album_types = &album_types
            .build()
            .to_string()
            .replace('\n', "\n            ");

        write!(code_timeago_tokens, "{} => Entry {{\n            timeago_tokens: {},\n            date_order: {},\n            months: {},\n            timeago_nd_tokens: {},\n            comma_decimal: {:?},\n            number_tokens: {},\n            number_nd_tokens: {},\n            album_types: {},\n        }},\n        ",
        selector, code_ta_tokens, date_order, code_months, code_ta_nd_tokens, entry.comma_decimal, code_number_tokens, code_number_nd_tokens, code_album_types).unwrap();
    }

    code_timeago_tokens = code_timeago_tokens.trim_end().to_owned() + "\n    }\n}\n";

    let code = format!("{code_head}\n{code_timeago_tokens}");

    let target_path = path!(*SRC_DIR / "util" / "dictionary.rs");
    std::fs::write(target_path, code).unwrap();
}