fix: parsing history dates

This commit is contained in:
ThetaDev 2025-01-18 05:51:41 +01:00
parent 32fda234e4
commit af7dc10163
No known key found for this signature in database
GPG key ID: E319D3C5148D65B6
10 changed files with 2084 additions and 429 deletions

View file

@ -5,26 +5,25 @@ use rustypipe::{
client::RustyPipe,
param::{Language, LANGUAGES},
};
use serde::{Deserialize, Serialize};
use crate::util::{self, DICT_DIR};
type CollectedDates = BTreeMap<Language, HistoryDates>;
type CollectedDates = BTreeMap<Language, BTreeMap<String, String>>;
#[derive(Debug, Serialize, Deserialize)]
struct HistoryDates {
this_week: String,
last_week: String,
}
const THIS_WEEK: &str = "this_week";
const LAST_WEEK: &str = "last_week";
pub async fn collect_dates() {
pub async fn collect_dates_music() {
let json_path = path!(*DICT_DIR / "history_date_samples.json");
let rp = RustyPipe::builder()
.storage_dir("/home/thetadev/Documents/Programmieren/Rust/rustypipe")
.storage_dir(path!(env!("CARGO_MANIFEST_DIR") / ".."))
.build()
.unwrap();
let mut res: CollectedDates = BTreeMap::new();
let mut res: CollectedDates = {
let json_file = File::open(&json_path).unwrap();
serde_json::from_reader(BufReader::new(json_file)).unwrap()
};
for lang in LANGUAGES {
println!("{lang}");
@ -34,14 +33,56 @@ pub async fn collect_dates() {
}
// The indexes have to be adapted before running
let d = HistoryDates {
this_week: history.items[0].playback_date_txt.clone().unwrap(),
last_week: history.items[18].playback_date_txt.clone().unwrap(),
};
res.insert(lang, d);
let entry = res.entry(lang).or_default();
entry.insert(
THIS_WEEK.to_owned(),
history.items[0].playback_date_txt.clone().unwrap(),
);
entry.insert(
LAST_WEEK.to_owned(),
history.items[18].playback_date_txt.clone().unwrap(),
);
}
let file = File::create(json_path).unwrap();
let file = File::create(&json_path).unwrap();
serde_json::to_writer_pretty(file, &res).unwrap();
}
pub async fn collect_dates() {
let json_path = path!(*DICT_DIR / "history_date_samples.json");
let rp = RustyPipe::builder()
.storage_dir(path!(env!("CARGO_MANIFEST_DIR") / ".."))
.build()
.unwrap();
let mut res: CollectedDates = {
let json_file = File::open(&json_path).unwrap();
serde_json::from_reader(BufReader::new(json_file)).unwrap()
};
for lang in LANGUAGES {
println!("{lang}");
let history = rp.query().lang(lang).history().await.unwrap();
if history.items.len() < 3 {
panic!("{lang} empty history")
}
let entry = res.entry(lang).or_default();
entry.insert(
"tuesday".to_owned(),
history.items[0].playback_date_txt.clone().unwrap(),
);
entry.insert(
"0000-01-06".to_owned(),
history.items[1].playback_date_txt.clone().unwrap(),
);
entry.insert(
"2024-12-28".to_owned(),
history.items[15].playback_date_txt.clone().unwrap(),
);
}
let file = File::create(&json_path).unwrap();
serde_json::to_writer_pretty(file, &res).unwrap();
}
@ -59,10 +100,10 @@ pub fn write_samples_to_dict() {
let cd = &collected_dates[&lang];
dict_entry
.timeago_nd_tokens
.insert(util::filter_datestr(&cd.this_week), "0Wl".to_owned());
.insert(util::filter_datestr(&cd[THIS_WEEK]), "0Wl".to_owned());
dict_entry
.timeago_nd_tokens
.insert(util::filter_datestr(&cd.last_week), "1Wl".to_owned());
.insert(util::filter_datestr(&cd[LAST_WEEK]), "1Wl".to_owned());
}
util::write_dict(dict);

View file

@ -45,7 +45,7 @@ pub fn generate_dictionary() {
use crate::{
model::AlbumType,
param::Language,
util::timeago::{DateCmp, TaToken, TimeUnit},
util::timeago::{TaToken, TimeUnit},
};
/// Dictionary entry containing language-specific parsing information
@ -57,14 +57,13 @@ pub(crate) struct Entry {
/// Identifiers: `Y`(ear), `M`(month), `W`(eek), `D`(ay),
/// `h`(our), `m`(inute), `s`(econd)
pub timeago_tokens: phf::Map<&'static str, TaToken>,
/// Order in which to parse numeric date components. Formatted as
/// a string of date identifiers (Y, M, D).
/// True if the month has to be parsed before the day
///
/// Examples:
///
/// - 03.01.2020 => `"DMY"`
/// - Jan 3, 2020 => `"DY"`
pub date_order: &'static [DateCmp],
/// - 03.01.2020 => DMY => false
/// - 01/03/2020 => MDY => true
pub month_before_day: bool,
/// Tokens for parsing month names.
///
/// Format: Parsed token -> Month number (starting from 1)
@ -139,13 +138,6 @@ pub(crate) fn entry(lang: Language) -> Entry {
};
});
// Date order
let mut date_order = "&[".to_owned();
entry.date_order.chars().for_each(|c| {
write!(date_order, "DateCmp::{c}, ").unwrap();
});
date_order = date_order.trim_end_matches([' ', ',']).to_owned() + "]";
// Number tokens
let mut number_tokens = phf_codegen::Map::<&str>::new();
entry.number_tokens.iter().for_each(|(txt, mag)| {
@ -186,8 +178,8 @@ pub(crate) fn entry(lang: Language) -> Entry {
.to_string()
.replace('\n', "\n ");
write!(code_timeago_tokens, "{} => Entry {{\n timeago_tokens: {},\n date_order: {},\n months: {},\n timeago_nd_tokens: {},\n comma_decimal: {:?},\n number_tokens: {},\n number_nd_tokens: {},\n album_types: {},\n chan_prefix: {:?},\n chan_suffix: {:?},\n }},\n ",
selector, code_ta_tokens, date_order, code_months, code_ta_nd_tokens, entry.comma_decimal, code_number_tokens, code_number_nd_tokens, code_album_types, entry.chan_prefix, entry.chan_suffix).unwrap();
write!(code_timeago_tokens, "{} => Entry {{\n timeago_tokens: {},\n month_before_day: {:?},\n months: {},\n timeago_nd_tokens: {},\n comma_decimal: {:?},\n number_tokens: {},\n number_nd_tokens: {},\n album_types: {},\n chan_prefix: {:?},\n chan_suffix: {:?},\n }},\n ",
selector, code_ta_tokens, entry.month_before_day, code_months, code_ta_nd_tokens, entry.comma_decimal, code_number_tokens, code_number_nd_tokens, code_album_types, entry.chan_prefix, entry.chan_suffix).unwrap();
}
code_timeago_tokens = code_timeago_tokens.trim_end().to_owned() + "\n }\n}\n";

View file

@ -32,6 +32,7 @@ enum Commands {
CollectVideoDurations,
CollectVideoDates,
CollectHistoryDates,
CollectMusicHistoryDates,
CollectChanPrefixes,
ParsePlaylistDates,
ParseHistoryDates,
@ -74,6 +75,9 @@ async fn main() {
Commands::CollectHistoryDates => {
collect_history_dates::collect_dates().await;
}
Commands::CollectMusicHistoryDates => {
collect_history_dates::collect_dates_music().await;
}
Commands::CollectChanPrefixes => {
collect_chan_prefixes::collect_chan_prefixes().await;
}

View file

@ -13,6 +13,13 @@ pub struct DictEntry {
/// Should the language be parsed by character instead of by word?
/// (e.g. Chinese/Japanese)
pub by_char: bool,
/// True if the month has to be parsed before the day
///
/// Examples:
///
/// - 03.01.2020 => DMY => false
/// - 01/03/2020 => MDY => true
pub month_before_day: bool,
/// Tokens for parsing timeago strings.
///
/// Format: Parsed token -> \[Quantity\] Identifier