feat: add history item dates, extend timeago parser
This commit is contained in:
parent
65ada37214
commit
320a8c2c24
28 changed files with 6507 additions and 2160 deletions
69
codegen/src/collect_history_dates.rs
Normal file
69
codegen/src/collect_history_dates.rs
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
use std::{collections::BTreeMap, fs::File, io::BufReader};
|
||||
|
||||
use path_macro::path;
|
||||
use rustypipe::{
|
||||
client::RustyPipe,
|
||||
param::{Language, LANGUAGES},
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::util::{self, DICT_DIR};
|
||||
|
||||
type CollectedDates = BTreeMap<Language, HistoryDates>;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct HistoryDates {
|
||||
this_week: String,
|
||||
last_week: String,
|
||||
}
|
||||
|
||||
pub async fn collect_dates() {
|
||||
let json_path = path!(*DICT_DIR / "history_date_samples.json");
|
||||
let rp = RustyPipe::builder()
|
||||
.storage_dir("/home/thetadev/Documents/Programmieren/Rust/rustypipe")
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let mut res: CollectedDates = BTreeMap::new();
|
||||
|
||||
for lang in LANGUAGES {
|
||||
println!("{lang}");
|
||||
let history = rp.query().lang(lang).music_history().await.unwrap();
|
||||
if history.items.len() < 3 {
|
||||
panic!("{lang} empty history")
|
||||
}
|
||||
|
||||
// The indexes have to be adapted before running
|
||||
let d = HistoryDates {
|
||||
this_week: history.items[0].playback_date_txt.clone().unwrap(),
|
||||
last_week: history.items[18].playback_date_txt.clone().unwrap(),
|
||||
};
|
||||
res.insert(lang, d);
|
||||
}
|
||||
|
||||
let file = File::create(json_path).unwrap();
|
||||
serde_json::to_writer_pretty(file, &res).unwrap();
|
||||
}
|
||||
|
||||
pub fn write_samples_to_dict() {
|
||||
let json_path = path!(*DICT_DIR / "history_date_samples.json");
|
||||
|
||||
let json_file = File::open(json_path).unwrap();
|
||||
let collected_dates: CollectedDates =
|
||||
serde_json::from_reader(BufReader::new(json_file)).unwrap();
|
||||
let mut dict = util::read_dict();
|
||||
let langs = dict.keys().copied().collect::<Vec<_>>();
|
||||
|
||||
for lang in langs {
|
||||
let dict_entry = dict.entry(lang).or_default();
|
||||
let cd = &collected_dates[&lang];
|
||||
dict_entry
|
||||
.timeago_nd_tokens
|
||||
.insert(util::filter_datestr(&cd.this_week), "0Wl".to_owned());
|
||||
dict_entry
|
||||
.timeago_nd_tokens
|
||||
.insert(util::filter_datestr(&cd.last_week), "1Wl".to_owned());
|
||||
}
|
||||
|
||||
util::write_dict(dict);
|
||||
}
|
||||
|
|
@ -10,7 +10,7 @@ use crate::{
|
|||
};
|
||||
|
||||
fn parse_tu(tu: &str) -> (u8, Option<TimeUnit>) {
|
||||
static TU_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"^(\d*)(\w?)$").unwrap());
|
||||
static TU_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"^(\d*)(\w*)$").unwrap());
|
||||
match TU_PATTERN.captures(tu) {
|
||||
Some(cap) => (
|
||||
cap.get(1).unwrap().as_str().parse().unwrap_or(1),
|
||||
|
|
@ -22,6 +22,8 @@ fn parse_tu(tu: &str) -> (u8, Option<TimeUnit>) {
|
|||
"W" => Some(TimeUnit::Week),
|
||||
"M" => Some(TimeUnit::Month),
|
||||
"Y" => Some(TimeUnit::Year),
|
||||
"Wl" => Some(TimeUnit::LastWeek),
|
||||
"Wd" => Some(TimeUnit::LastWeekday),
|
||||
"" => None,
|
||||
_ => panic!("invalid time unit: {tu}"),
|
||||
},
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
mod abtest;
|
||||
mod collect_album_types;
|
||||
mod collect_chan_prefixes;
|
||||
mod collect_history_dates;
|
||||
mod collect_large_numbers;
|
||||
mod collect_playlist_dates;
|
||||
mod collect_video_dates;
|
||||
|
|
@ -30,8 +31,10 @@ enum Commands {
|
|||
CollectAlbumTypes,
|
||||
CollectVideoDurations,
|
||||
CollectVideoDates,
|
||||
CollectHistoryDates,
|
||||
CollectChanPrefixes,
|
||||
ParsePlaylistDates,
|
||||
ParseHistoryDates,
|
||||
ParseLargeNumbers,
|
||||
ParseAlbumTypes,
|
||||
ParseVideoDurations,
|
||||
|
|
@ -68,10 +71,14 @@ async fn main() {
|
|||
Commands::CollectVideoDates => {
|
||||
collect_video_dates::collect_video_dates(cli.concurrency).await;
|
||||
}
|
||||
Commands::CollectHistoryDates => {
|
||||
collect_history_dates::collect_dates().await;
|
||||
}
|
||||
Commands::CollectChanPrefixes => {
|
||||
collect_chan_prefixes::collect_chan_prefixes().await;
|
||||
}
|
||||
Commands::ParsePlaylistDates => collect_playlist_dates::write_samples_to_dict(),
|
||||
Commands::ParseHistoryDates => collect_history_dates::write_samples_to_dict(),
|
||||
Commands::ParseLargeNumbers => collect_large_numbers::write_samples_to_dict(),
|
||||
Commands::ParseAlbumTypes => collect_album_types::write_samples_to_dict(),
|
||||
Commands::ParseVideoDurations => collect_video_durations::parse_video_durations(),
|
||||
|
|
|
|||
|
|
@ -88,6 +88,8 @@ pub enum TimeUnit {
|
|||
Week,
|
||||
Month,
|
||||
Year,
|
||||
LastWeek,
|
||||
LastWeekday,
|
||||
}
|
||||
|
||||
impl TimeUnit {
|
||||
|
|
@ -100,6 +102,8 @@ impl TimeUnit {
|
|||
TimeUnit::Week => "W",
|
||||
TimeUnit::Month => "M",
|
||||
TimeUnit::Year => "Y",
|
||||
TimeUnit::LastWeek => "Wl",
|
||||
TimeUnit::LastWeekday => "Wd",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ pub fn filter_datestr(string: &str) -> String {
|
|||
.to_lowercase()
|
||||
.chars()
|
||||
.filter_map(|c| {
|
||||
if c == '\u{200b}' || c.is_ascii_digit() {
|
||||
if matches!(c, '\u{200b}' | '.' | ',') || c.is_ascii_digit() {
|
||||
None
|
||||
} else if c == '-' {
|
||||
Some(' ')
|
||||
|
|
|
|||
Reference in a new issue