fix: add dictionary support for short timeago strings

This commit is contained in:
ThetaDev 2023-05-31 01:41:46 +02:00
parent cc2cadc309
commit 0cd018e37a
10 changed files with 6308 additions and 1694 deletions

View file

@ -0,0 +1,83 @@
use std::{
collections::{BTreeMap, HashSet},
fs::File,
};
use futures::{stream, StreamExt};
use path_macro::path;
use rustypipe::{
client::{RustyPipe, RustyPipeQuery},
param::{Language, LANGUAGES},
};
use crate::util::DICT_DIR;
pub async fn collect_video_dates(concurrency: usize) {
let json_path = path!(*DICT_DIR / "timeago_samples_short.json");
let rp = RustyPipe::builder()
.visitor_data("Cgtwel9tMkh2eHh0USiyzc6jBg%3D%3D")
.build();
let channels = [
"UCeY0bbntWzzVIaj2z3QigXg",
"UCcmpeVbSSQlZRvHfdC-CRwg",
"UC65afEgL62PGFWXY7n6CUbA",
"UCEOXxzW2vU0P-0THehuIIeg",
];
let mut lang_strings: BTreeMap<Language, Vec<String>> = BTreeMap::new();
for lang in LANGUAGES {
println!("{lang}");
let query = rp.query().lang(lang);
let strings = stream::iter(channels)
.map(|id| get_channel_datestrings(&query, id))
.buffered(concurrency)
.collect::<Vec<_>>()
.await
.into_iter()
.flatten()
.collect::<Vec<_>>();
lang_strings.insert(lang, strings);
}
let mut en_strings_uniq: HashSet<&str> = HashSet::new();
let mut uniq_ids: HashSet<usize> = HashSet::new();
lang_strings[&Language::En]
.iter()
.enumerate()
.for_each(|(n, s)| {
if en_strings_uniq.insert(s) {
uniq_ids.insert(n);
}
});
let strings_map = lang_strings
.iter()
.map(|(lang, strings)| {
(
lang,
strings
.iter()
.enumerate()
.filter(|(n, _)| uniq_ids.contains(n))
.map(|(_, s)| s)
.collect::<Vec<_>>(),
)
})
.collect::<BTreeMap<_, _>>();
let file = File::create(json_path).unwrap();
serde_json::to_writer_pretty(file, &strings_map).unwrap();
}
async fn get_channel_datestrings(rp: &RustyPipeQuery, id: &str) -> Vec<String> {
let channel = rp.channel_videos(id).await.unwrap();
channel
.content
.items
.into_iter()
.filter_map(|itm| itm.publish_date_txt)
.collect()
}

View file

@ -4,6 +4,7 @@ mod abtest;
mod collect_album_types;
mod collect_large_numbers;
mod collect_playlist_dates;
mod collect_video_dates;
mod collect_video_durations;
mod download_testfiles;
mod gen_dictionary;
@ -27,6 +28,7 @@ enum Commands {
CollectLargeNumbers,
CollectAlbumTypes,
CollectVideoDurations,
CollectVideoDates,
ParsePlaylistDates,
ParseLargeNumbers,
ParseAlbumTypes,
@ -60,6 +62,9 @@ async fn main() {
Commands::CollectVideoDurations => {
collect_video_durations::collect_video_durations(cli.concurrency).await;
}
Commands::CollectVideoDates => {
collect_video_dates::collect_video_dates(cli.concurrency).await;
}
Commands::ParsePlaylistDates => collect_playlist_dates::write_samples_to_dict(),
Commands::ParseLargeNumbers => collect_large_numbers::write_samples_to_dict(),
Commands::ParseAlbumTypes => collect_album_types::write_samples_to_dict(),