fix: parsing history dates
This commit is contained in:
parent
32fda234e4
commit
af7dc10163
10 changed files with 2084 additions and 429 deletions
|
|
@ -5,26 +5,25 @@ use rustypipe::{
|
|||
client::RustyPipe,
|
||||
param::{Language, LANGUAGES},
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::util::{self, DICT_DIR};
|
||||
|
||||
type CollectedDates = BTreeMap<Language, HistoryDates>;
|
||||
type CollectedDates = BTreeMap<Language, BTreeMap<String, String>>;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct HistoryDates {
|
||||
this_week: String,
|
||||
last_week: String,
|
||||
}
|
||||
const THIS_WEEK: &str = "this_week";
|
||||
const LAST_WEEK: &str = "last_week";
|
||||
|
||||
pub async fn collect_dates() {
|
||||
pub async fn collect_dates_music() {
|
||||
let json_path = path!(*DICT_DIR / "history_date_samples.json");
|
||||
let rp = RustyPipe::builder()
|
||||
.storage_dir("/home/thetadev/Documents/Programmieren/Rust/rustypipe")
|
||||
.storage_dir(path!(env!("CARGO_MANIFEST_DIR") / ".."))
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let mut res: CollectedDates = BTreeMap::new();
|
||||
let mut res: CollectedDates = {
|
||||
let json_file = File::open(&json_path).unwrap();
|
||||
serde_json::from_reader(BufReader::new(json_file)).unwrap()
|
||||
};
|
||||
|
||||
for lang in LANGUAGES {
|
||||
println!("{lang}");
|
||||
|
|
@ -34,14 +33,56 @@ pub async fn collect_dates() {
|
|||
}
|
||||
|
||||
// The indexes have to be adapted before running
|
||||
let d = HistoryDates {
|
||||
this_week: history.items[0].playback_date_txt.clone().unwrap(),
|
||||
last_week: history.items[18].playback_date_txt.clone().unwrap(),
|
||||
};
|
||||
res.insert(lang, d);
|
||||
let entry = res.entry(lang).or_default();
|
||||
entry.insert(
|
||||
THIS_WEEK.to_owned(),
|
||||
history.items[0].playback_date_txt.clone().unwrap(),
|
||||
);
|
||||
entry.insert(
|
||||
LAST_WEEK.to_owned(),
|
||||
history.items[18].playback_date_txt.clone().unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
let file = File::create(json_path).unwrap();
|
||||
let file = File::create(&json_path).unwrap();
|
||||
serde_json::to_writer_pretty(file, &res).unwrap();
|
||||
}
|
||||
|
||||
pub async fn collect_dates() {
|
||||
let json_path = path!(*DICT_DIR / "history_date_samples.json");
|
||||
let rp = RustyPipe::builder()
|
||||
.storage_dir(path!(env!("CARGO_MANIFEST_DIR") / ".."))
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let mut res: CollectedDates = {
|
||||
let json_file = File::open(&json_path).unwrap();
|
||||
serde_json::from_reader(BufReader::new(json_file)).unwrap()
|
||||
};
|
||||
|
||||
for lang in LANGUAGES {
|
||||
println!("{lang}");
|
||||
let history = rp.query().lang(lang).history().await.unwrap();
|
||||
if history.items.len() < 3 {
|
||||
panic!("{lang} empty history")
|
||||
}
|
||||
|
||||
let entry = res.entry(lang).or_default();
|
||||
entry.insert(
|
||||
"tuesday".to_owned(),
|
||||
history.items[0].playback_date_txt.clone().unwrap(),
|
||||
);
|
||||
entry.insert(
|
||||
"0000-01-06".to_owned(),
|
||||
history.items[1].playback_date_txt.clone().unwrap(),
|
||||
);
|
||||
entry.insert(
|
||||
"2024-12-28".to_owned(),
|
||||
history.items[15].playback_date_txt.clone().unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
let file = File::create(&json_path).unwrap();
|
||||
serde_json::to_writer_pretty(file, &res).unwrap();
|
||||
}
|
||||
|
||||
|
|
@ -59,10 +100,10 @@ pub fn write_samples_to_dict() {
|
|||
let cd = &collected_dates[&lang];
|
||||
dict_entry
|
||||
.timeago_nd_tokens
|
||||
.insert(util::filter_datestr(&cd.this_week), "0Wl".to_owned());
|
||||
.insert(util::filter_datestr(&cd[THIS_WEEK]), "0Wl".to_owned());
|
||||
dict_entry
|
||||
.timeago_nd_tokens
|
||||
.insert(util::filter_datestr(&cd.last_week), "1Wl".to_owned());
|
||||
.insert(util::filter_datestr(&cd[LAST_WEEK]), "1Wl".to_owned());
|
||||
}
|
||||
|
||||
util::write_dict(dict);
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ pub fn generate_dictionary() {
|
|||
use crate::{
|
||||
model::AlbumType,
|
||||
param::Language,
|
||||
util::timeago::{DateCmp, TaToken, TimeUnit},
|
||||
util::timeago::{TaToken, TimeUnit},
|
||||
};
|
||||
|
||||
/// Dictionary entry containing language-specific parsing information
|
||||
|
|
@ -57,14 +57,13 @@ pub(crate) struct Entry {
|
|||
/// Identifiers: `Y`(ear), `M`(month), `W`(eek), `D`(ay),
|
||||
/// `h`(our), `m`(inute), `s`(econd)
|
||||
pub timeago_tokens: phf::Map<&'static str, TaToken>,
|
||||
/// Order in which to parse numeric date components. Formatted as
|
||||
/// a string of date identifiers (Y, M, D).
|
||||
/// True if the month has to be parsed before the day
|
||||
///
|
||||
/// Examples:
|
||||
///
|
||||
/// - 03.01.2020 => `"DMY"`
|
||||
/// - Jan 3, 2020 => `"DY"`
|
||||
pub date_order: &'static [DateCmp],
|
||||
/// - 03.01.2020 => DMY => false
|
||||
/// - 01/03/2020 => MDY => true
|
||||
pub month_before_day: bool,
|
||||
/// Tokens for parsing month names.
|
||||
///
|
||||
/// Format: Parsed token -> Month number (starting from 1)
|
||||
|
|
@ -139,13 +138,6 @@ pub(crate) fn entry(lang: Language) -> Entry {
|
|||
};
|
||||
});
|
||||
|
||||
// Date order
|
||||
let mut date_order = "&[".to_owned();
|
||||
entry.date_order.chars().for_each(|c| {
|
||||
write!(date_order, "DateCmp::{c}, ").unwrap();
|
||||
});
|
||||
date_order = date_order.trim_end_matches([' ', ',']).to_owned() + "]";
|
||||
|
||||
// Number tokens
|
||||
let mut number_tokens = phf_codegen::Map::<&str>::new();
|
||||
entry.number_tokens.iter().for_each(|(txt, mag)| {
|
||||
|
|
@ -186,8 +178,8 @@ pub(crate) fn entry(lang: Language) -> Entry {
|
|||
.to_string()
|
||||
.replace('\n', "\n ");
|
||||
|
||||
write!(code_timeago_tokens, "{} => Entry {{\n timeago_tokens: {},\n date_order: {},\n months: {},\n timeago_nd_tokens: {},\n comma_decimal: {:?},\n number_tokens: {},\n number_nd_tokens: {},\n album_types: {},\n chan_prefix: {:?},\n chan_suffix: {:?},\n }},\n ",
|
||||
selector, code_ta_tokens, date_order, code_months, code_ta_nd_tokens, entry.comma_decimal, code_number_tokens, code_number_nd_tokens, code_album_types, entry.chan_prefix, entry.chan_suffix).unwrap();
|
||||
write!(code_timeago_tokens, "{} => Entry {{\n timeago_tokens: {},\n month_before_day: {:?},\n months: {},\n timeago_nd_tokens: {},\n comma_decimal: {:?},\n number_tokens: {},\n number_nd_tokens: {},\n album_types: {},\n chan_prefix: {:?},\n chan_suffix: {:?},\n }},\n ",
|
||||
selector, code_ta_tokens, entry.month_before_day, code_months, code_ta_nd_tokens, entry.comma_decimal, code_number_tokens, code_number_nd_tokens, code_album_types, entry.chan_prefix, entry.chan_suffix).unwrap();
|
||||
}
|
||||
|
||||
code_timeago_tokens = code_timeago_tokens.trim_end().to_owned() + "\n }\n}\n";
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ enum Commands {
|
|||
CollectVideoDurations,
|
||||
CollectVideoDates,
|
||||
CollectHistoryDates,
|
||||
CollectMusicHistoryDates,
|
||||
CollectChanPrefixes,
|
||||
ParsePlaylistDates,
|
||||
ParseHistoryDates,
|
||||
|
|
@ -74,6 +75,9 @@ async fn main() {
|
|||
Commands::CollectHistoryDates => {
|
||||
collect_history_dates::collect_dates().await;
|
||||
}
|
||||
Commands::CollectMusicHistoryDates => {
|
||||
collect_history_dates::collect_dates_music().await;
|
||||
}
|
||||
Commands::CollectChanPrefixes => {
|
||||
collect_chan_prefixes::collect_chan_prefixes().await;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,6 +13,13 @@ pub struct DictEntry {
|
|||
/// Should the language be parsed by character instead of by word?
|
||||
/// (e.g. Chinese/Japanese)
|
||||
pub by_char: bool,
|
||||
/// True if the month has to be parsed before the day
|
||||
///
|
||||
/// Examples:
|
||||
///
|
||||
/// - 03.01.2020 => DMY => false
|
||||
/// - 01/03/2020 => MDY => true
|
||||
pub month_before_day: bool,
|
||||
/// Tokens for parsing timeago strings.
|
||||
///
|
||||
/// Format: Parsed token -> \[Quantity\] Identifier
|
||||
|
|
|
|||
Reference in a new issue