feat: multilanguage album type parsing
- new album types: Audiobook, Show
This commit is contained in:
parent
abfd630a04
commit
45e2d3c7c7
15 changed files with 2354 additions and 32 deletions
121
codegen/src/collect_album_types.rs
Normal file
121
codegen/src/collect_album_types.rs
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
use std::{collections::BTreeMap, fs::File, io::BufReader, path::Path};
|
||||
|
||||
use futures::stream::{self, StreamExt};
|
||||
use rustypipe::{
|
||||
client::{ClientType, RustyPipe, RustyPipeQuery, YTContext},
|
||||
model::AlbumType,
|
||||
param::{locale::LANGUAGES, Language},
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::util::{self, TextRuns};
|
||||
|
||||
pub async fn collect_album_types(project_root: &Path, concurrency: usize) {
|
||||
let mut json_path = project_root.to_path_buf();
|
||||
json_path.push("testfiles/dict/album_type_samples.json");
|
||||
|
||||
let album_types = [
|
||||
(AlbumType::Album, "MPREb_nlBWQROfvjo"),
|
||||
(AlbumType::Single, "MPREb_bHfHGoy7vuv"),
|
||||
(AlbumType::Ep, "MPREb_u1I69lSAe5v"),
|
||||
(AlbumType::Audiobook, "MPREb_gaoNzsQHedo"),
|
||||
(AlbumType::Show, "MPREb_cwzk8EUwypZ"),
|
||||
];
|
||||
|
||||
let rp = RustyPipe::new();
|
||||
|
||||
let collected_album_types = stream::iter(LANGUAGES)
|
||||
.map(|lang| {
|
||||
let rp = rp.clone();
|
||||
async move {
|
||||
let query = rp.query().lang(lang);
|
||||
let mut data: BTreeMap<AlbumType, String> = BTreeMap::new();
|
||||
|
||||
for (album_type, id) in album_types {
|
||||
let atype_txt = get_album_type(&query, id).await;
|
||||
println!("collected {}-{:?} ({})", lang, album_type, &atype_txt);
|
||||
data.insert(album_type, atype_txt);
|
||||
}
|
||||
|
||||
(lang, data)
|
||||
}
|
||||
})
|
||||
.buffer_unordered(concurrency)
|
||||
.collect::<BTreeMap<_, _>>()
|
||||
.await;
|
||||
|
||||
let file = File::create(json_path).unwrap();
|
||||
serde_json::to_writer_pretty(file, &collected_album_types).unwrap();
|
||||
}
|
||||
|
||||
pub fn write_samples_to_dict(project_root: &Path) {
|
||||
let mut json_path = project_root.to_path_buf();
|
||||
json_path.push("testfiles/dict/album_type_samples.json");
|
||||
|
||||
let json_file = File::open(json_path).unwrap();
|
||||
let collected: BTreeMap<Language, BTreeMap<AlbumType, String>> =
|
||||
serde_json::from_reader(BufReader::new(json_file)).unwrap();
|
||||
let mut dict = util::read_dict(project_root);
|
||||
let langs = dict.keys().map(|k| k.to_owned()).collect::<Vec<_>>();
|
||||
|
||||
for lang in langs {
|
||||
let dict_entry = dict.entry(lang).or_default();
|
||||
|
||||
let mut e_langs = dict_entry.equivalent.clone();
|
||||
e_langs.push(lang);
|
||||
|
||||
collected.get(&lang).unwrap().iter().for_each(|(t, v)| {
|
||||
dict_entry.album_types.insert(v.to_lowercase(), *t);
|
||||
});
|
||||
}
|
||||
|
||||
util::write_dict(project_root, &dict);
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct AlbumData {
|
||||
header: Header,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct Header {
|
||||
music_detail_header_renderer: HeaderRenderer,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct HeaderRenderer {
|
||||
subtitle: TextRuns,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct QBrowse<'a> {
|
||||
context: YTContext<'a>,
|
||||
browse_id: &'a str,
|
||||
}
|
||||
|
||||
async fn get_album_type(query: &RustyPipeQuery, id: &str) -> String {
|
||||
let context = query
|
||||
.get_context(ClientType::DesktopMusic, true, None)
|
||||
.await;
|
||||
let body = QBrowse {
|
||||
context,
|
||||
browse_id: id,
|
||||
};
|
||||
let response_txt = query
|
||||
.raw(ClientType::DesktopMusic, "browse", &body)
|
||||
.await
|
||||
.unwrap();
|
||||
let album = serde_json::from_str::<AlbumData>(&response_txt).unwrap();
|
||||
|
||||
album
|
||||
.header
|
||||
.music_detail_header_renderer
|
||||
.subtitle
|
||||
.runs
|
||||
.into_iter()
|
||||
.next()
|
||||
.unwrap()
|
||||
.text
|
||||
}
|
||||
|
|
@ -375,13 +375,11 @@ async fn get_channel(channel_id: &str, lang: Language) -> Result<ChannelData> {
|
|||
.iter()
|
||||
.map(|itm| {
|
||||
(
|
||||
util::parse_numeric(
|
||||
&itm.grid_video_renderer.view_count_text.simple_text,
|
||||
)
|
||||
.unwrap(),
|
||||
util::parse_numeric(&itm.grid_video_renderer.view_count_text.text)
|
||||
.unwrap(),
|
||||
itm.grid_video_renderer
|
||||
.short_view_count_text
|
||||
.simple_text
|
||||
.text
|
||||
.to_owned(),
|
||||
)
|
||||
})
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ pub fn generate_dictionary(project_root: &Path) {
|
|||
let code_head = r#"// This file is automatically generated. DO NOT EDIT.
|
||||
// See codegen/gen_dictionary.rs for the generation code.
|
||||
use crate::{
|
||||
model::AlbumType,
|
||||
param::Language,
|
||||
timeago::{DateCmp, TaToken, TimeUnit},
|
||||
};
|
||||
|
|
@ -75,6 +76,10 @@ pub(crate) struct Entry {
|
|||
///
|
||||
/// Format: Parsed token -> decimal power
|
||||
pub number_tokens: phf::Map<&'static str, u8>,
|
||||
/// Names of album types (Album, Single, ...)
|
||||
///
|
||||
/// Format: Parsed text -> Album type
|
||||
pub album_types: phf::Map<&'static str, AlbumType>,
|
||||
}
|
||||
"#;
|
||||
|
||||
|
|
@ -136,13 +141,20 @@ pub(crate) fn entry(lang: Language) -> Entry {
|
|||
number_tokens.entry(txt, &mag.to_string());
|
||||
});
|
||||
|
||||
// Album types
|
||||
let mut album_types = phf_codegen::Map::<&str>::new();
|
||||
entry.album_types.iter().for_each(|(txt, album_type)| {
|
||||
album_types.entry(txt, &format!("AlbumType::{:?}", album_type));
|
||||
});
|
||||
|
||||
let code_ta_tokens = &ta_tokens.build().to_string().replace('\n', "\n ");
|
||||
let code_ta_nd_tokens = &ta_nd_tokens.build().to_string().replace('\n', "\n ");
|
||||
let code_months = &months.build().to_string().replace('\n', "\n ");
|
||||
let code_number_tokens = &number_tokens.build().to_string().replace('\n', "\n ");
|
||||
let code_album_types = &album_types.build().to_string().replace('\n', "\n ");
|
||||
|
||||
let _ = write!(code_timeago_tokens, "{} => Entry {{\n by_char: {:?},\n timeago_tokens: {},\n date_order: {},\n months: {},\n timeago_nd_tokens: {},\n comma_decimal: {:?},\n number_tokens: {},\n }},\n ",
|
||||
selector, entry.by_char, code_ta_tokens, date_order, code_months, code_ta_nd_tokens, entry.comma_decimal, code_number_tokens);
|
||||
let _ = write!(code_timeago_tokens, "{} => Entry {{\n by_char: {:?},\n timeago_tokens: {},\n date_order: {},\n months: {},\n timeago_nd_tokens: {},\n comma_decimal: {:?},\n number_tokens: {},\n album_types: {},\n }},\n ",
|
||||
selector, entry.by_char, code_ta_tokens, date_order, code_months, code_ta_nd_tokens, entry.comma_decimal, code_number_tokens, code_album_types);
|
||||
});
|
||||
|
||||
code_timeago_tokens = code_timeago_tokens.trim_end().to_owned() + "\n }\n}\n";
|
||||
|
|
|
|||
|
|
@ -359,7 +359,7 @@ fn map_language_section(section: &CompactLinkRendererWrap) -> BTreeMap<String, S
|
|||
.select_language_command
|
||||
.hl
|
||||
.to_owned(),
|
||||
i.compact_link_renderer.title.simple_text.to_owned(),
|
||||
i.compact_link_renderer.title.text.to_owned(),
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
mod collect_album_types;
|
||||
mod collect_large_numbers;
|
||||
mod collect_playlist_dates;
|
||||
mod download_testfiles;
|
||||
|
|
@ -23,8 +24,10 @@ struct Cli {
|
|||
enum Commands {
|
||||
CollectPlaylistDates,
|
||||
CollectLargeNumbers,
|
||||
CollectAlbumTypes,
|
||||
ParsePlaylistDates,
|
||||
ParseLargeNumbers,
|
||||
ParseAlbumTypes,
|
||||
GenLocales,
|
||||
GenDict,
|
||||
DownloadTestfiles,
|
||||
|
|
@ -42,12 +45,16 @@ async fn main() {
|
|||
Commands::CollectLargeNumbers => {
|
||||
collect_large_numbers::collect_large_numbers(&cli.project_root, cli.concurrency).await;
|
||||
}
|
||||
Commands::CollectAlbumTypes => {
|
||||
collect_album_types::collect_album_types(&cli.project_root, cli.concurrency).await;
|
||||
}
|
||||
Commands::ParsePlaylistDates => {
|
||||
collect_playlist_dates::write_samples_to_dict(&cli.project_root)
|
||||
}
|
||||
Commands::ParseLargeNumbers => {
|
||||
collect_large_numbers::write_samples_to_dict(&cli.project_root)
|
||||
}
|
||||
Commands::ParseAlbumTypes => collect_album_types::write_samples_to_dict(&cli.project_root),
|
||||
Commands::GenLocales => {
|
||||
gen_locales::generate_locales(&cli.project_root).await;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
use std::{collections::BTreeMap, fs::File, io::BufReader, path::Path, str::FromStr};
|
||||
|
||||
use rustypipe::param::Language;
|
||||
use rustypipe::{model::AlbumType, param::Language};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
const DICT_PATH: &str = "testfiles/dict/dictionary.json";
|
||||
|
|
@ -44,12 +44,21 @@ pub struct DictEntry {
|
|||
///
|
||||
/// Format: Parsed token -> decimal power
|
||||
pub number_tokens: BTreeMap<String, u8>,
|
||||
/// Names of album types (Album, Single, ...)
|
||||
///
|
||||
/// Format: Parsed text -> Album type
|
||||
pub album_types: BTreeMap<String, AlbumType>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
pub struct TextRuns {
|
||||
pub runs: Vec<Text>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Text {
|
||||
pub simple_text: String,
|
||||
#[serde(alias = "simpleText")]
|
||||
pub text: String,
|
||||
}
|
||||
|
||||
pub fn read_dict(project_root: &Path) -> Dictionary {
|
||||
|
|
|
|||
Reference in a new issue