feat: add history item dates, extend timeago parser

This commit is contained in:
ThetaDev 2025-01-03 19:15:28 +01:00
parent 65ada37214
commit 320a8c2c24
No known key found for this signature in database
GPG key ID: E319D3C5148D65B6
28 changed files with 6507 additions and 2160 deletions

View file

@ -0,0 +1,69 @@
use std::{collections::BTreeMap, fs::File, io::BufReader};
use path_macro::path;
use rustypipe::{
client::RustyPipe,
param::{Language, LANGUAGES},
};
use serde::{Deserialize, Serialize};
use crate::util::{self, DICT_DIR};
type CollectedDates = BTreeMap<Language, HistoryDates>;
#[derive(Debug, Serialize, Deserialize)]
struct HistoryDates {
this_week: String,
last_week: String,
}
pub async fn collect_dates() {
let json_path = path!(*DICT_DIR / "history_date_samples.json");
let rp = RustyPipe::builder()
.storage_dir("/home/thetadev/Documents/Programmieren/Rust/rustypipe")
.build()
.unwrap();
let mut res: CollectedDates = BTreeMap::new();
for lang in LANGUAGES {
println!("{lang}");
let history = rp.query().lang(lang).music_history().await.unwrap();
if history.items.len() < 3 {
panic!("{lang} empty history")
}
// The indexes have to be adapted before running
let d = HistoryDates {
this_week: history.items[0].playback_date_txt.clone().unwrap(),
last_week: history.items[18].playback_date_txt.clone().unwrap(),
};
res.insert(lang, d);
}
let file = File::create(json_path).unwrap();
serde_json::to_writer_pretty(file, &res).unwrap();
}
pub fn write_samples_to_dict() {
let json_path = path!(*DICT_DIR / "history_date_samples.json");
let json_file = File::open(json_path).unwrap();
let collected_dates: CollectedDates =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let mut dict = util::read_dict();
let langs = dict.keys().copied().collect::<Vec<_>>();
for lang in langs {
let dict_entry = dict.entry(lang).or_default();
let cd = &collected_dates[&lang];
dict_entry
.timeago_nd_tokens
.insert(util::filter_datestr(&cd.this_week), "0Wl".to_owned());
dict_entry
.timeago_nd_tokens
.insert(util::filter_datestr(&cd.last_week), "1Wl".to_owned());
}
util::write_dict(dict);
}

View file

@ -10,7 +10,7 @@ use crate::{
};
fn parse_tu(tu: &str) -> (u8, Option<TimeUnit>) {
static TU_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"^(\d*)(\w?)$").unwrap());
static TU_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"^(\d*)(\w*)$").unwrap());
match TU_PATTERN.captures(tu) {
Some(cap) => (
cap.get(1).unwrap().as_str().parse().unwrap_or(1),
@ -22,6 +22,8 @@ fn parse_tu(tu: &str) -> (u8, Option<TimeUnit>) {
"W" => Some(TimeUnit::Week),
"M" => Some(TimeUnit::Month),
"Y" => Some(TimeUnit::Year),
"Wl" => Some(TimeUnit::LastWeek),
"Wd" => Some(TimeUnit::LastWeekday),
"" => None,
_ => panic!("invalid time unit: {tu}"),
},

View file

@ -3,6 +3,7 @@
mod abtest;
mod collect_album_types;
mod collect_chan_prefixes;
mod collect_history_dates;
mod collect_large_numbers;
mod collect_playlist_dates;
mod collect_video_dates;
@ -30,8 +31,10 @@ enum Commands {
CollectAlbumTypes,
CollectVideoDurations,
CollectVideoDates,
CollectHistoryDates,
CollectChanPrefixes,
ParsePlaylistDates,
ParseHistoryDates,
ParseLargeNumbers,
ParseAlbumTypes,
ParseVideoDurations,
@ -68,10 +71,14 @@ async fn main() {
Commands::CollectVideoDates => {
collect_video_dates::collect_video_dates(cli.concurrency).await;
}
Commands::CollectHistoryDates => {
collect_history_dates::collect_dates().await;
}
Commands::CollectChanPrefixes => {
collect_chan_prefixes::collect_chan_prefixes().await;
}
Commands::ParsePlaylistDates => collect_playlist_dates::write_samples_to_dict(),
Commands::ParseHistoryDates => collect_history_dates::write_samples_to_dict(),
Commands::ParseLargeNumbers => collect_large_numbers::write_samples_to_dict(),
Commands::ParseAlbumTypes => collect_album_types::write_samples_to_dict(),
Commands::ParseVideoDurations => collect_video_durations::parse_video_durations(),

View file

@ -88,6 +88,8 @@ pub enum TimeUnit {
Week,
Month,
Year,
LastWeek,
LastWeekday,
}
impl TimeUnit {
@ -100,6 +102,8 @@ impl TimeUnit {
TimeUnit::Week => "W",
TimeUnit::Month => "M",
TimeUnit::Year => "Y",
TimeUnit::LastWeek => "Wl",
TimeUnit::LastWeekday => "Wd",
}
}
}

View file

@ -77,7 +77,7 @@ pub fn filter_datestr(string: &str) -> String {
.to_lowercase()
.chars()
.filter_map(|c| {
if c == '\u{200b}' || c.is_ascii_digit() {
if matches!(c, '\u{200b}' | '.' | ',') || c.is_ascii_digit() {
None
} else if c == '-' {
Some(' ')