feat: add absolute dates/months to dictionary
This commit is contained in:
parent
c9433d721d
commit
d18f175aef
10 changed files with 9942 additions and 1834 deletions
|
|
@ -1,12 +1,19 @@
|
|||
#![cfg(test)]
|
||||
|
||||
use std::{collections::BTreeMap, fs::File, path::Path};
|
||||
use std::{
|
||||
collections::{BTreeMap, HashMap},
|
||||
fs::File,
|
||||
hash::Hash,
|
||||
io::BufReader,
|
||||
path::Path,
|
||||
};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{
|
||||
client::RustyTube,
|
||||
model::{locale::LANGUAGES, Country, Language},
|
||||
util,
|
||||
};
|
||||
|
||||
type CollectedDates = BTreeMap<Language, BTreeMap<DateCase, String>>;
|
||||
|
|
@ -30,7 +37,7 @@ enum DateCase {
|
|||
Dec,
|
||||
}
|
||||
|
||||
#[test_log::test(tokio::test)]
|
||||
// #[test_log::test(tokio::test)]
|
||||
async fn collect_dates() {
|
||||
let json_path = Path::new("testfiles/date/playlist_samples.json").to_path_buf();
|
||||
if json_path.exists() {
|
||||
|
|
@ -44,7 +51,7 @@ async fn collect_dates() {
|
|||
),
|
||||
(DateCase::Yesterday, "PLmB6td997u3kUOrfFwkULZ910ho44oQSy"),
|
||||
(DateCase::Ago, "PL7zsB-C3aNu2yRY2869T0zj1FhtRIu5am"),
|
||||
(DateCase::Jan, "PL1J-6JOckZtHxTA3hN5SK7gBQaFfKzeXr"),
|
||||
(DateCase::Jan, "PL1J-6JOckZtFjcni6Xj1pLYglJp6JCpKD"),
|
||||
(DateCase::Feb, "PL1J-6JOckZtETrbzwZE7mRIIK6BzWNLAs"),
|
||||
(DateCase::Mar, "PL1J-6JOckZtG3AVdvBXhMO64mB2k3BtKi"),
|
||||
(DateCase::Apr, "PL1J-6JOckZtE_rUpK24S6X5hOE4eQoprN"),
|
||||
|
|
@ -75,3 +82,102 @@ async fn collect_dates() {
|
|||
let file = File::create(json_path).unwrap();
|
||||
serde_json::to_writer_pretty(file, &collected_dates).unwrap();
|
||||
}
|
||||
|
||||
// #[test]
|
||||
fn parse_months() {
|
||||
let json_path = Path::new("testfiles/date/playlist_samples.json").to_path_buf();
|
||||
let json_file = File::open(json_path).unwrap();
|
||||
let collected_dates: CollectedDates =
|
||||
serde_json::from_reader(BufReader::new(json_file)).unwrap();
|
||||
let mut dict = super::read_dict();
|
||||
let langs = dict.keys().map(|k| k.to_owned()).collect::<Vec<_>>();
|
||||
|
||||
let months = [
|
||||
DateCase::Jan,
|
||||
DateCase::Feb,
|
||||
DateCase::Mar,
|
||||
DateCase::Apr,
|
||||
DateCase::May,
|
||||
DateCase::Jun,
|
||||
DateCase::Jul,
|
||||
DateCase::Aug,
|
||||
DateCase::Sep,
|
||||
DateCase::Oct,
|
||||
DateCase::Nov,
|
||||
DateCase::Dec,
|
||||
];
|
||||
|
||||
let dates: [(u32, u32, u32); 12] = [
|
||||
(2020, 1, 3),
|
||||
(2016, 2, 7),
|
||||
(2015, 3, 9),
|
||||
(2017, 4, 2),
|
||||
(2014, 5, 22),
|
||||
(2014, 6, 28),
|
||||
(2014, 7, 2),
|
||||
(2015, 8, 23),
|
||||
(2018, 9, 16),
|
||||
(2014, 10, 31),
|
||||
(2016, 11, 3),
|
||||
(2021, 12, 24),
|
||||
];
|
||||
|
||||
for lang in langs {
|
||||
let mut month_words: HashMap<String, usize> = HashMap::new();
|
||||
let mut num_order = "".to_owned();
|
||||
|
||||
months.iter().enumerate().for_each(|(n, m)| {
|
||||
let datestr = collected_dates.get(&lang).unwrap().get(m).unwrap();
|
||||
|
||||
// Get order of numbers
|
||||
let nums = util::parse_numeric_vec::<u32>(&datestr);
|
||||
let date = dates[n];
|
||||
|
||||
let this_num_order = nums
|
||||
.iter()
|
||||
.map(|n| {
|
||||
if n == &date.0 {
|
||||
"Y"
|
||||
} else if n == &date.1 {
|
||||
"M"
|
||||
} else if n == &date.2 {
|
||||
"D"
|
||||
} else {
|
||||
panic!("invalid number {} in {}", n, datestr);
|
||||
}
|
||||
})
|
||||
.collect::<String>();
|
||||
|
||||
if num_order == "" {
|
||||
num_order = this_num_order;
|
||||
} else {
|
||||
assert_eq!(this_num_order, num_order);
|
||||
}
|
||||
|
||||
// Insert words into the map
|
||||
let filtered_str = datestr
|
||||
.chars()
|
||||
.filter(|c| !c.is_ascii_digit())
|
||||
.collect::<String>();
|
||||
|
||||
filtered_str.split_whitespace().for_each(|word| {
|
||||
month_words
|
||||
.entry(word.to_owned())
|
||||
.and_modify(|e| *e = 0)
|
||||
.or_insert(n + 1);
|
||||
});
|
||||
});
|
||||
|
||||
let dict_entry = dict.entry(lang).or_default();
|
||||
dict_entry.date_order = num_order;
|
||||
dict_entry.months = month_words.iter().filter_map(|(word, m)| {
|
||||
if *m == 0 {
|
||||
None
|
||||
} else {
|
||||
Some((word.to_owned(), *m as u8))
|
||||
}
|
||||
}).collect();
|
||||
}
|
||||
|
||||
super::write_dict(&dict);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,31 +1,11 @@
|
|||
#![cfg(test)]
|
||||
|
||||
use std::{
|
||||
collections::BTreeMap,
|
||||
fmt::Debug,
|
||||
fs::File,
|
||||
io::{BufReader},
|
||||
};
|
||||
|
||||
use crate::{model::Language, timeago::TimeUnit};
|
||||
use crate::{timeago::TimeUnit};
|
||||
use fancy_regex::Regex;
|
||||
use once_cell::sync::Lazy;
|
||||
use serde::Deserialize;
|
||||
|
||||
const DICT_PATH: &str = "testfiles/date/dictionary.json";
|
||||
const TARGET_FILE: &str = "src/dictionary.rs";
|
||||
|
||||
type Dictionary = BTreeMap<Language, DictEntry>;
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct DictEntry {
|
||||
#[serde(default)]
|
||||
equivalent: Vec<Language>,
|
||||
#[serde(default)]
|
||||
by_char: bool,
|
||||
timeago_tokens: BTreeMap<String, String>,
|
||||
}
|
||||
|
||||
fn parse_tu(tu: &str) -> (u8, Option<TimeUnit>) {
|
||||
static TU_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"^(\d*)(\w?)$").unwrap());
|
||||
match TU_PATTERN.captures(tu).unwrap() {
|
||||
|
|
@ -47,51 +27,62 @@ fn parse_tu(tu: &str) -> (u8, Option<TimeUnit>) {
|
|||
}
|
||||
}
|
||||
|
||||
fn read_dict() -> Dictionary {
|
||||
let json_file = File::open(DICT_PATH).unwrap();
|
||||
serde_json::from_reader(BufReader::new(json_file)).unwrap()
|
||||
}
|
||||
|
||||
// #[test]
|
||||
fn generate_dictionary() {
|
||||
let dict = read_dict();
|
||||
let dict = super::read_dict();
|
||||
|
||||
let code_head = r#"// This file is automatically generated. DO NOT EDIT.
|
||||
use crate::{
|
||||
model::Language,
|
||||
timeago::{TaToken, TimeUnit},
|
||||
};
|
||||
|
||||
pub struct Entry {
|
||||
pub timeago_tokens: phf::Map<&'static str, TaToken>,
|
||||
pub date_order: &'static str,
|
||||
pub months: phf::Map<&'static str, u8>,
|
||||
}
|
||||
"#;
|
||||
|
||||
let mut code_timeago_tokens = r#"#[rustfmt::skip]
|
||||
pub(crate) fn get_timeago_tokens(lang: Language) -> phf::Map<&'static str, TaToken> {
|
||||
pub fn entry(lang: Language) -> Entry {
|
||||
match lang {
|
||||
"#
|
||||
.to_owned();
|
||||
|
||||
dict.iter().for_each(|(lang, entry)| {
|
||||
// Create a map for the language
|
||||
let mut map = phf_codegen::Map::<&str>::new();
|
||||
|
||||
entry.timeago_tokens.iter().for_each(|(txt, tu_str)| {
|
||||
let (n, unit) = parse_tu(&tu_str);
|
||||
match unit {
|
||||
Some(unit) => map.entry(
|
||||
&txt,
|
||||
&format!("TaToken {{ n: {}, unit: Some(TimeUnit::{:?}) }}", n, unit),
|
||||
),
|
||||
None => map.entry(&txt, &format!("TaToken {{ n: {}, unit: None }}", n)),
|
||||
};
|
||||
});
|
||||
|
||||
// Match selector
|
||||
let mut selector = format!("Language::{:?}", lang);
|
||||
entry.equivalent.iter().for_each(|eq| {
|
||||
selector += &format!(" | Language::{:?}", eq);
|
||||
});
|
||||
|
||||
let code_map = &map.build().to_string().replace('\n', "\n ");
|
||||
// Timeago tokens
|
||||
let mut ta_tokens = phf_codegen::Map::<&str>::new();
|
||||
entry.timeago_tokens.iter().for_each(|(txt, tu_str)| {
|
||||
let (n, unit) = parse_tu(&tu_str);
|
||||
match unit {
|
||||
Some(unit) => ta_tokens.entry(
|
||||
&txt,
|
||||
&format!("TaToken {{ n: {}, unit: Some(TimeUnit::{:?}) }}", n, unit),
|
||||
),
|
||||
None => ta_tokens.entry(&txt, &format!("TaToken {{ n: {}, unit: None }}", n)),
|
||||
};
|
||||
});
|
||||
|
||||
code_timeago_tokens += &format!("{} => {},\n ", selector, code_map);
|
||||
// Months
|
||||
let mut months = phf_codegen::Map::<&str>::new();
|
||||
entry.months.iter().for_each(|(txt, n_mon)| {
|
||||
months.entry(&txt, &n_mon.to_string());
|
||||
});
|
||||
|
||||
let code_ta_tokens = &ta_tokens.build().to_string().replace('\n', "\n ");
|
||||
let code_months = &months.build().to_string().replace('\n', "\n ");
|
||||
|
||||
code_timeago_tokens += &format!(
|
||||
"{} => Entry {{\n timeago_tokens: {},\n date_order: \"{}\",\n months: {},\n }},\n ",
|
||||
selector, code_ta_tokens, entry.date_order, code_months
|
||||
);
|
||||
});
|
||||
|
||||
code_timeago_tokens = code_timeago_tokens.trim_end().to_owned() + "\n }\n}\n";
|
||||
|
|
|
|||
|
|
@ -1,4 +1,34 @@
|
|||
#![cfg(test)]
|
||||
|
||||
use std::{collections::BTreeMap, fs::File, io::BufReader};
|
||||
|
||||
use serde::{Serialize, Deserialize};
|
||||
|
||||
use crate::model::Language;
|
||||
mod collect_playlist_dates;
|
||||
mod gen_dictionary;
|
||||
mod gen_locales;
|
||||
|
||||
const DICT_PATH: &str = "testfiles/date/dictionary.json";
|
||||
|
||||
type Dictionary = BTreeMap<Language, DictEntry>;
|
||||
|
||||
#[derive(Debug, Default, Serialize, Deserialize)]
|
||||
#[serde(default)]
|
||||
struct DictEntry {
|
||||
equivalent: Vec<Language>,
|
||||
by_char: bool,
|
||||
timeago_tokens: BTreeMap<String, String>,
|
||||
date_order: String,
|
||||
months: BTreeMap<String, u8>,
|
||||
}
|
||||
|
||||
fn read_dict() -> Dictionary {
|
||||
let json_file = File::open(DICT_PATH).unwrap();
|
||||
serde_json::from_reader(BufReader::new(json_file)).unwrap()
|
||||
}
|
||||
|
||||
fn write_dict(dict: &Dictionary) {
|
||||
let json_file = File::create(DICT_PATH).unwrap();
|
||||
serde_json::to_writer_pretty(json_file, dict).unwrap();
|
||||
}
|
||||
|
|
|
|||
Reference in a new issue