feat(codegen): collected video duration samples

2023-05-06 21:12:49 +02:00 · 2023-05-06 21:12:49 +02:00 · 800073df48
commit 800073df48
parent 19781eab36
14 changed files with 7325 additions and 1449 deletions
--- a/codegen/Cargo.toml
+++ b/codegen/Cargo.toml
@ -19,5 +19,7 @@ phf_codegen = "0.11.1"
 once_cell = "1.12.0"
 regex = "1.7.1"
 indicatif = "0.17.0"
-num_enum = "0.5.7"
+num_enum = "0.6.1"
 path_macro = "1.0.0"
+intl_pluralrules = "7.0.2"
+unic-langid = "0.9.1"
--- a/codegen/src/collect_album_types.rs
+++ b/codegen/src/collect_album_types.rs
@ -1,4 +1,4 @@
-use std::{collections::BTreeMap, fs::File, io::BufReader, path::Path};
+use std::{collections::BTreeMap, fs::File, io::BufReader};

 use futures::stream::{self, StreamExt};
 use path_macro::path;
@ -9,10 +9,13 @@ use rustypipe::{
 };
 use serde::Deserialize;

-use crate::util::{self, QBrowse, TextRuns};
+use crate::{
+    model::{QBrowse, TextRuns},
+    util::{self, DICT_DIR},
+};

-pub async fn collect_album_types(project_root: &Path, concurrency: usize) {
-    let json_path = path!(project_root / "testfiles" / "dict" / "album_type_samples.json");
+pub async fn collect_album_types(concurrency: usize) {
+    let json_path = path!(*DICT_DIR / "album_type_samples.json");

    let album_types = [
        (AlbumType::Album, "MPREb_nlBWQROfvjo"),
@ -48,13 +51,13 @@ pub async fn collect_album_types(project_root: &Path, concurrency: usize) {
    serde_json::to_writer_pretty(file, &collected_album_types).unwrap();
 }

-pub fn write_samples_to_dict(project_root: &Path) {
-    let json_path = path!(project_root / "testfiles" / "dict" / "album_type_samples.json");
+pub fn write_samples_to_dict() {
+    let json_path = path!(*DICT_DIR / "album_type_samples.json");

    let json_file = File::open(json_path).unwrap();
    let collected: BTreeMap<Language, BTreeMap<AlbumType, String>> =
        serde_json::from_reader(BufReader::new(json_file)).unwrap();
-    let mut dict = util::read_dict(project_root);
+    let mut dict = util::read_dict();
    let langs = dict.keys().map(|k| k.to_owned()).collect::<Vec<_>>();

    for lang in langs {
@ -72,7 +75,7 @@ pub fn write_samples_to_dict(project_root: &Path) {
        });
    }

-    util::write_dict(project_root, dict);
+    util::write_dict(dict);
 }

 #[derive(Debug, Deserialize)]
--- a/codegen/src/collect_large_numbers.rs
+++ b/codegen/src/collect_large_numbers.rs
@ -3,7 +3,6 @@ use std::{
    collections::{BTreeMap, HashMap, HashSet},
    fs::File,
    io::BufReader,
-    path::Path,
 };

 use anyhow::{Context, Result};
@ -14,9 +13,13 @@ use regex::Regex;
 use rustypipe::client::{ClientType, RustyPipe, RustyPipeQuery};
 use rustypipe::param::{locale::LANGUAGES, Language};
 use serde::Deserialize;
-use serde_with::{serde_as, DefaultOnError, VecSkipError};

-use crate::util::{self, QBrowse, QCont, Text, TextRuns};
+use crate::model::{Channel, ContinuationResponse};
+use crate::util::DICT_DIR;
+use crate::{
+    model::{QBrowse, QCont, TextRuns},
+    util,
+};

 type CollectedNumbers = BTreeMap<Language, BTreeMap<String, u64>>;

@ -34,8 +37,8 @@ type CollectedNumbers = BTreeMap<Language, BTreeMap<String, u64>>;
 /// We extract these instead of subscriber counts because the YouTube API
 /// outputs view counts both in approximated and exact format, so we can use
 /// the exact counts to figure out the tokens.
-pub async fn collect_large_numbers(project_root: &Path, concurrency: usize) {
-    let json_path = path!(project_root / "testfiles" / "dict" / "large_number_samples_all.json");
+pub async fn collect_large_numbers(concurrency: usize) {
+    let json_path = path!(*DICT_DIR / "large_number_samples_all.json");
    let rp = RustyPipe::new();

    let channels = [
@ -137,13 +140,13 @@ pub async fn collect_large_numbers(project_root: &Path, concurrency: usize) {

 /// Attempt to parse the numbers collected by `collect-large-numbers`
 /// and write the results to `dictionary.json`.
-pub fn write_samples_to_dict(project_root: &Path) {
-    let json_path = path!(project_root / "testfiles" / "dict" / "large_number_samples.json");
+pub fn write_samples_to_dict() {
+    let json_path = path!(*DICT_DIR / "large_number_samples.json");

    let json_file = File::open(json_path).unwrap();
    let collected_nums: CollectedNumbers =
        serde_json::from_reader(BufReader::new(json_file)).unwrap();
-    let mut dict = util::read_dict(project_root);
+    let mut dict = util::read_dict();
    let langs = dict.keys().map(|k| k.to_owned()).collect::<Vec<_>>();

    static POINT_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"\d(\.|,)\d{1,3}(?:\D|$)").unwrap());
@ -292,164 +295,13 @@ pub fn write_samples_to_dict(project_root: &Path) {
        }
    }

-    util::write_dict(project_root, dict);
+    util::write_dict(dict);
 }

 fn get_mag(n: u64) -> u8 {
    (n as f64).log10().floor() as u8
 }

-/*
-YouTube channel videos response
-*/
-
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-struct Channel {
-    contents: Contents,
-    header: ChannelHeader,
-}
-
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-struct ChannelHeader {
-    c4_tabbed_header_renderer: HeaderRenderer,
-}
-
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-struct HeaderRenderer {
-    subscriber_count_text: Text,
-}
-
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-struct Contents {
-    two_column_browse_results_renderer: TabsRenderer,
-}
-
-#[serde_as]
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-struct TabsRenderer {
-    #[serde_as(as = "VecSkipError<_>")]
-    tabs: Vec<TabRendererWrap>,
-}
-
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-struct TabRendererWrap {
-    tab_renderer: TabRenderer,
-}
-
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-struct TabRenderer {
-    content: RichGridRendererWrap,
-}
-
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-struct RichGridRendererWrap {
-    rich_grid_renderer: RichGridRenderer,
-}
-
-#[serde_as]
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-struct RichGridRenderer {
-    #[serde_as(as = "VecSkipError<_>")]
-    contents: Vec<RichItemRendererWrap>,
-    #[serde(default)]
-    #[serde_as(as = "DefaultOnError")]
-    header: Option<RichGridHeader>,
-}
-
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-struct RichItemRendererWrap {
-    rich_item_renderer: RichItemRenderer,
-}
-
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-struct RichItemRenderer {
-    content: VideoRendererWrap,
-}
-
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-struct VideoRendererWrap {
-    video_renderer: VideoRenderer,
-}
-
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-struct VideoRenderer {
-    /// `24,194 views`
-    view_count_text: Text,
-    /// `19K views`
-    short_view_count_text: Text,
-}
-
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-struct RichGridHeader {
-    feed_filter_chip_bar_renderer: ChipBar,
-}
-
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-struct ChipBar {
-    contents: Vec<Chip>,
-}
-
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-struct Chip {
-    chip_cloud_chip_renderer: ChipRenderer,
-}
-
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-struct ChipRenderer {
-    navigation_endpoint: NavigationEndpoint,
-}
-
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-struct NavigationEndpoint {
-    continuation_command: ContinuationCommand,
-}
-
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-struct ContinuationCommand {
-    token: String,
-}
-
-#[serde_as]
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-struct ContinuationResponse {
-    // #[serde_as(as = "VecSkipError<_>")]
-    on_response_received_actions: Vec<ContinuationAction>,
-}
-
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-struct ContinuationAction {
-    reload_continuation_items_command: ContinuationItemsWrap,
-}
-
-#[serde_as]
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-struct ContinuationItemsWrap {
-    #[serde_as(as = "VecSkipError<_>")]
-    continuation_items: Vec<RichItemRendererWrap>,
-}
-
 /*
 YouTube Music channel data
 */
--- a/codegen/src/collect_playlist_dates.rs
+++ b/codegen/src/collect_playlist_dates.rs
@ -3,7 +3,6 @@ use std::{
    fs::File,
    hash::Hash,
    io::BufReader,
-    path::Path,
 };

 use futures::{stream, StreamExt};
@ -11,11 +10,10 @@ use path_macro::path;
 use rustypipe::{
    client::RustyPipe,
    param::{locale::LANGUAGES, Language},
-    timeago::{self, TimeAgo},
 };
 use serde::{Deserialize, Serialize};

-use crate::util;
+use crate::util::{self, DICT_DIR};

 type CollectedDates = BTreeMap<Language, BTreeMap<DateCase, String>>;

@ -38,8 +36,6 @@ enum DateCase {
    Dec,
 }

-const N_AGO: u8 = 5;
-
 /// Collect 'Playlist updated' dates in every supported language
 /// and write them to `testfiles/dict/playlist_samples.json`.
 ///
@ -64,8 +60,8 @@ const N_AGO: u8 = 5;
 ///
 /// Because the relative dates change with time, the first three playlists
 /// have to checked and eventually changed before running the program.
-pub async fn collect_dates(project_root: &Path, concurrency: usize) {
-    let json_path = path!(project_root / "testfiles" / "dict" / "playlist_samples.json");
+pub async fn collect_dates(concurrency: usize) {
+    let json_path = path!(*DICT_DIR / "playlist_samples.json");

    // These are the sample playlists
    let cases = [
@ -115,13 +111,13 @@ pub async fn collect_dates(project_root: &Path, concurrency: usize) {
 ///
 /// The ND (no digit) tokens (today, tomorrow) of some languages cannot be
 /// parsed automatically and require manual work.
-pub fn write_samples_to_dict(project_root: &Path) {
-    let json_path = path!(project_root / "testfiles" / "dict" / "playlist_samples.json");
+pub fn write_samples_to_dict() {
+    let json_path = path!(*DICT_DIR / "playlist_samples.json");

    let json_file = File::open(json_path).unwrap();
    let collected_dates: CollectedDates =
        serde_json::from_reader(BufReader::new(json_file)).unwrap();
-    let mut dict = util::read_dict(project_root);
+    let mut dict = util::read_dict();
    let langs = dict.keys().map(|k| k.to_owned()).collect::<Vec<_>>();

    let months = [
@ -200,20 +196,6 @@ pub fn write_samples_to_dict(project_root: &Path) {
                parse(datestr_table.get(&DateCase::Jan).unwrap(), 0);
            }

-            // n days ago
-            {
-                let datestr = datestr_table.get(&DateCase::Ago).unwrap();
-                let tago = timeago::parse_timeago(lang, datestr);
-                assert_eq!(
-                    tago,
-                    Some(TimeAgo {
-                        n: N_AGO,
-                        unit: timeago::TimeUnit::Day
-                    }),
-                    "lang: {lang}, txt: {datestr}"
-                );
-            }
-
            // Absolute dates (Jan 3, 2020)
            months.iter().enumerate().for_each(|(n, m)| {
                let datestr = datestr_table.get(m).unwrap();
@ -291,5 +273,5 @@ pub fn write_samples_to_dict(project_root: &Path) {
        dict_entry.date_order = num_order;
    }

-    util::write_dict(project_root, dict);
+    util::write_dict(dict);
 }
--- a/codegen/src/collect_video_durations.rs
+++ b/codegen/src/collect_video_durations.rs
@ -0,0 +1,176 @@
+use std::{collections::BTreeMap, fs::File};
+
+use anyhow::Result;
+use futures::{stream, StreamExt};
+use path_macro::path;
+use rustypipe::{
+    client::{ClientType, RustyPipe, RustyPipeQuery},
+    param::{locale::LANGUAGES, Language},
+};
+
+use crate::{
+    model::{Channel, QBrowse},
+    util::{self, DICT_DIR},
+};
+
+type CollectedDurations = BTreeMap<Language, BTreeMap<String, u32>>;
+
+/// Collect the video duration texts in every supported language
+/// and write them to `testfiles/dict/video_duration_samples.json`.
+///
+/// The length of YouTube short videos is only available in textual form.
+/// To parse it correctly, we need to collect samples of this text in every
+/// language. We collect these samples from regular channel videos because these
+/// include a textual duration in addition to the easy to parse "mm:ss"
+/// duration format.
+pub async fn collect_video_durations(concurrency: usize) {
+    let json_path = path!(*DICT_DIR / "video_duration_samples.json");
+    let rp = RustyPipe::new();
+
+    let channels = [
+        "UCq-Fj5jknLsUf-MWSy4_brA",
+        "UCMcS5ITpSohfr8Ppzlo4vKw",
+        "UCXuqSBlHAE6Xw-yeJA0Tunw",
+    ];
+
+    let durations: CollectedDurations = stream::iter(LANGUAGES)
+        .map(|lang| {
+            let rp = rp.query().lang(lang);
+            async move {
+                let mut map = BTreeMap::new();
+
+                for (n, ch_id) in channels.iter().enumerate() {
+                    get_channel_vlengths(&rp, ch_id, &mut map).await.unwrap();
+                    println!("collected {lang}-{n}");
+                }
+
+                // Since we are only parsing shorts durations, we do not need durations >= 1h
+                let map = map.into_iter().filter(|(_, v)| v < &3600).collect();
+                (lang, map)
+            }
+        })
+        .buffer_unordered(concurrency)
+        .collect()
+        .await;
+
+    let file = File::create(json_path).unwrap();
+    serde_json::to_writer_pretty(file, &durations).unwrap();
+}
+
+async fn get_channel_vlengths(
+    query: &RustyPipeQuery,
+    channel_id: &str,
+    map: &mut BTreeMap<String, u32>,
+) -> Result<()> {
+    let resp = query
+        .raw(
+            ClientType::Desktop,
+            "browse",
+            &QBrowse {
+                context: query.get_context(ClientType::Desktop, true, None).await,
+                browse_id: channel_id,
+                params: Some("EgZ2aWRlb3MYASAAMAE"),
+            },
+        )
+        .await?;
+
+    let channel = serde_json::from_str::<Channel>(&resp)?;
+
+    let tab = channel
+        .contents
+        .two_column_browse_results_renderer
+        .tabs
+        .into_iter()
+        .next()
+        .unwrap()
+        .tab_renderer
+        .content
+        .rich_grid_renderer;
+
+    tab.contents.into_iter().for_each(|c| {
+        let lt = c.rich_item_renderer.content.video_renderer.length_text;
+        let duration = util::parse_video_length(&lt.simple_text).unwrap();
+        map.insert(lt.accessibility.accessibility_data.label, duration);
+    });
+
+    Ok(())
+}
+
+#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
+enum PluralCategory {
+    Zero,
+    One,
+    Two,
+    Few,
+    Many,
+    Other,
+}
+
+impl From<intl_pluralrules::PluralCategory> for PluralCategory {
+    fn from(value: intl_pluralrules::PluralCategory) -> Self {
+        match value {
+            intl_pluralrules::PluralCategory::ZERO => Self::Zero,
+            intl_pluralrules::PluralCategory::ONE => Self::One,
+            intl_pluralrules::PluralCategory::TWO => Self::Two,
+            intl_pluralrules::PluralCategory::FEW => Self::Few,
+            intl_pluralrules::PluralCategory::MANY => Self::Many,
+            intl_pluralrules::PluralCategory::OTHER => Self::Other,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use std::collections::HashSet;
+    use std::io::BufReader;
+
+    use intl_pluralrules::{PluralRuleType, PluralRules};
+    use unic_langid::LanguageIdentifier;
+
+    fn split_duration(d: u32) -> (u32, u32) {
+        (d / 60, d % 60)
+    }
+
+    /// Verify that the duration sample set covers all pluralization variants of the languages
+    #[test]
+    fn check_video_duration_samples() {
+        let json_path = path!(*DICT_DIR / "video_duration_samples.json");
+        let json_file = File::open(json_path).unwrap();
+        let durations: CollectedDurations =
+            serde_json::from_reader(BufReader::new(json_file)).unwrap();
+        let mut failed = false;
+
+        for (lang, durations) in durations {
+            let ul: LanguageIdentifier =
+                lang.to_string().split('-').next().unwrap().parse().unwrap();
+
+            let pr = PluralRules::create(ul, PluralRuleType::CARDINAL).expect(&lang.to_string());
+
+            let mut plurals_m: HashSet<PluralCategory> = HashSet::new();
+            for n in 1..60 {
+                plurals_m.insert(pr.select(n).unwrap().into());
+            }
+            let mut plurals_s = plurals_m.clone();
+
+            durations.values().for_each(|v| {
+                let (m, s) = split_duration(*v);
+                plurals_m.remove(&pr.select(m).unwrap().into());
+                plurals_s.remove(&pr.select(s).unwrap().into());
+            });
+
+            if !plurals_m.is_empty() {
+                println!("{lang}: missing minutes {plurals_m:?}");
+                failed = true;
+            }
+
+            if !plurals_s.is_empty() {
+                println!("{lang}: missing seconds {plurals_m:?}");
+                failed = true;
+            }
+        }
+
+        assert!(!failed);
+    }
+}
--- a/codegen/src/download_testfiles.rs
+++ b/codegen/src/download_testfiles.rs
@ -5,6 +5,7 @@ use std::{
    sync::Mutex,
 };

+use path_macro::path;
 use rustypipe::{
    client::{ClientType, RustyPipe},
    param::{
@ -14,55 +15,54 @@ use rustypipe::{
    report::{Report, Reporter},
 };

-pub async fn download_testfiles(project_root: &Path) {
-    let mut testfiles = project_root.to_path_buf();
-    testfiles.push("testfiles");
+use crate::util::TESTFILES_DIR;

-    player(&testfiles).await;
-    player_model(&testfiles).await;
-    playlist(&testfiles).await;
-    playlist_cont(&testfiles).await;
-    video_details(&testfiles).await;
-    comments_top(&testfiles).await;
-    comments_latest(&testfiles).await;
-    recommendations(&testfiles).await;
-    channel_videos(&testfiles).await;
-    channel_shorts(&testfiles).await;
-    channel_livestreams(&testfiles).await;
-    channel_playlists(&testfiles).await;
-    channel_info(&testfiles).await;
-    channel_videos_cont(&testfiles).await;
-    channel_playlists_cont(&testfiles).await;
-    search(&testfiles).await;
-    search_cont(&testfiles).await;
-    search_playlists(&testfiles).await;
-    search_empty(&testfiles).await;
-    startpage(&testfiles).await;
-    startpage_cont(&testfiles).await;
-    trending(&testfiles).await;
+pub async fn download_testfiles() {
+    player().await;
+    player_model().await;
+    playlist().await;
+    playlist_cont().await;
+    video_details().await;
+    comments_top().await;
+    comments_latest().await;
+    recommendations().await;
+    channel_videos().await;
+    channel_shorts().await;
+    channel_livestreams().await;
+    channel_playlists().await;
+    channel_info().await;
+    channel_videos_cont().await;
+    channel_playlists_cont().await;
+    search().await;
+    search_cont().await;
+    search_playlists().await;
+    search_empty().await;
+    startpage().await;
+    startpage_cont().await;
+    trending().await;

-    music_playlist(&testfiles).await;
-    music_playlist_cont(&testfiles).await;
-    music_playlist_related(&testfiles).await;
-    music_album(&testfiles).await;
-    music_search(&testfiles).await;
-    music_search_tracks(&testfiles).await;
-    music_search_albums(&testfiles).await;
-    music_search_artists(&testfiles).await;
-    music_search_playlists(&testfiles).await;
-    music_search_cont(&testfiles).await;
-    music_search_suggestion(&testfiles).await;
-    music_artist(&testfiles).await;
-    music_details(&testfiles).await;
-    music_lyrics(&testfiles).await;
-    music_related(&testfiles).await;
-    music_radio(&testfiles).await;
-    music_radio_cont(&testfiles).await;
-    music_new_albums(&testfiles).await;
-    music_new_videos(&testfiles).await;
-    music_charts(&testfiles).await;
-    music_genres(&testfiles).await;
-    music_genre(&testfiles).await;
+    music_playlist().await;
+    music_playlist_cont().await;
+    music_playlist_related().await;
+    music_album().await;
+    music_search().await;
+    music_search_tracks().await;
+    music_search_albums().await;
+    music_search_artists().await;
+    music_search_playlists().await;
+    music_search_cont().await;
+    music_search_suggestion().await;
+    music_artist().await;
+    music_details().await;
+    music_lyrics().await;
+    music_related().await;
+    music_radio().await;
+    music_radio_cont().await;
+    music_new_albums().await;
+    music_new_videos().await;
+    music_charts().await;
+    music_genres().await;
+    music_genre().await;
 }

 const CLIENT_TYPES: [ClientType; 5] = [
@ -136,14 +136,12 @@ fn rp_testfile(json_path: &Path) -> RustyPipe {
        .build()
 }

-async fn player(testfiles: &Path) {
+async fn player() {
    let video_id = "pPvd8UxmSbQ";

    for client_type in CLIENT_TYPES {
-        let mut json_path = testfiles.to_path_buf();
-        json_path.push("player");
-        json_path.push(format!("{client_type:?}_video.json").to_lowercase());
-
+        let json_path =
+            path!(*TESTFILES_DIR / "player" / format!("{client_type:?}_video.json").to_lowercase());
        if json_path.exists() {
            continue;
        }
@ -156,14 +154,12 @@ async fn player(testfiles: &Path) {
    }
 }

-async fn player_model(testfiles: &Path) {
+async fn player_model() {
    let rp = RustyPipe::builder().strict().build();

    for (name, id) in [("multilanguage", "tVWWp1PqDus"), ("hdr", "LXb3EKWsInQ")] {
-        let mut json_path = testfiles.to_path_buf();
-        json_path.push("player_model");
-        json_path.push(format!("{name}.json").to_lowercase());
-
+        let json_path =
+            path!(*TESTFILES_DIR / "player_model" / format!("{name}.json").to_lowercase());
        if json_path.exists() {
            continue;
        }
@ -180,15 +176,13 @@ async fn player_model(testfiles: &Path) {
    }
 }

-async fn playlist(testfiles: &Path) {
+async fn playlist() {
    for (name, id) in [
        ("short", "RDCLAK5uy_kFQXdnqMaQCVx2wpUM4ZfbsGCDibZtkJk"),
        ("long", "PL5dDx681T4bR7ZF1IuWzOv1omlRbE7PiJ"),
        ("nomusic", "PL1J-6JOckZtE_P9Xx8D3b2O6w0idhuKBe"),
    ] {
-        let mut json_path = testfiles.to_path_buf();
-        json_path.push("playlist");
-        json_path.push(format!("playlist_{name}.json"));
+        let json_path = path!(*TESTFILES_DIR / "playlist" / format!("playlist_{name}.json"));
        if json_path.exists() {
            continue;
        }
@ -198,10 +192,8 @@ async fn playlist(testfiles: &Path) {
    }
 }

-async fn playlist_cont(testfiles: &Path) {
-    let mut json_path = testfiles.to_path_buf();
-    json_path.push("playlist");
-    json_path.push("playlist_cont.json");
+async fn playlist_cont() {
+    let json_path = path!(*TESTFILES_DIR / "playlist" / "playlist_cont.json");
    if json_path.exists() {
        return;
    }
@ -217,7 +209,7 @@ async fn playlist_cont(testfiles: &Path) {
    playlist.videos.next(rp.query()).await.unwrap().unwrap();
 }

-async fn video_details(testfiles: &Path) {
+async fn video_details() {
    for (name, id) in [
        ("music", "XuM2onMGvTI"),
        ("mv", "ZeerrnuLi5E"),
@ -226,9 +218,8 @@ async fn video_details(testfiles: &Path) {
        ("live", "86YLFOog4GM"),
        ("agegate", "HRKu0cvrr_o"),
    ] {
-        let mut json_path = testfiles.to_path_buf();
-        json_path.push("video_details");
-        json_path.push(format!("video_details_{name}.json"));
+        let json_path =
+            path!(*TESTFILES_DIR / "video_details" / format!("video_details_{name}.json"));
        if json_path.exists() {
            continue;
        }
@ -238,10 +229,8 @@ async fn video_details(testfiles: &Path) {
    }
 }

-async fn comments_top(testfiles: &Path) {
-    let mut json_path = testfiles.to_path_buf();
-    json_path.push("video_details");
-    json_path.push("comments_top.json");
+async fn comments_top() {
+    let json_path = path!(*TESTFILES_DIR / "video_details" / "comments_top.json");
    if json_path.exists() {
        return;
    }
@ -258,10 +247,8 @@ async fn comments_top(testfiles: &Path) {
        .unwrap();
 }

-async fn comments_latest(testfiles: &Path) {
-    let mut json_path = testfiles.to_path_buf();
-    json_path.push("video_details");
-    json_path.push("comments_latest.json");
+async fn comments_latest() {
+    let json_path = path!(*TESTFILES_DIR / "video_details" / "comments_latest.json");
    if json_path.exists() {
        return;
    }
@ -278,10 +265,8 @@ async fn comments_latest(testfiles: &Path) {
        .unwrap();
 }

-async fn recommendations(testfiles: &Path) {
-    let mut json_path = testfiles.to_path_buf();
-    json_path.push("video_details");
-    json_path.push("recommendations.json");
+async fn recommendations() {
+    let json_path = path!(*TESTFILES_DIR / "video_details" / "recommendations.json");
    if json_path.exists() {
        return;
    }
@ -293,7 +278,7 @@ async fn recommendations(testfiles: &Path) {
    details.recommended.next(rp.query()).await.unwrap();
 }

-async fn channel_videos(testfiles: &Path) {
+async fn channel_videos() {
    for (name, id) in [
        ("base", "UC2DjFE7Xf11URZqWBigcVOQ"),
        ("music", "UC_vmjW5e1xEHhYjY2a0kK1A"), // YouTube Music channels have no videos
@ -302,9 +287,7 @@ async fn channel_videos(testfiles: &Path) {
        ("empty", "UCxBa895m48H5idw5li7h-0g"),
        ("upcoming", "UCcvfHa-GHSOHFAjU0-Ie57A"),
    ] {
-        let mut json_path = testfiles.to_path_buf();
-        json_path.push("channel");
-        json_path.push(format!("channel_videos_{name}.json"));
+        let json_path = path!(*TESTFILES_DIR / "channel" / format!("channel_videos_{name}.json"));
        if json_path.exists() {
            continue;
        }
@ -314,10 +297,8 @@ async fn channel_videos(testfiles: &Path) {
    }
 }

-async fn channel_shorts(testfiles: &Path) {
-    let mut json_path = testfiles.to_path_buf();
-    json_path.push("channel");
-    json_path.push("channel_shorts.json");
+async fn channel_shorts() {
+    let json_path = path!(*TESTFILES_DIR / "channel" / "channel_shorts.json");
    if json_path.exists() {
        return;
    }
@ -329,10 +310,8 @@ async fn channel_shorts(testfiles: &Path) {
        .unwrap();
 }

-async fn channel_livestreams(testfiles: &Path) {
-    let mut json_path = testfiles.to_path_buf();
-    json_path.push("channel");
-    json_path.push("channel_livestreams.json");
+async fn channel_livestreams() {
+    let json_path = path!(*TESTFILES_DIR / "channel" / "channel_livestreams.json");
    if json_path.exists() {
        return;
    }
@ -344,10 +323,8 @@ async fn channel_livestreams(testfiles: &Path) {
        .unwrap();
 }

-async fn channel_playlists(testfiles: &Path) {
-    let mut json_path = testfiles.to_path_buf();
-    json_path.push("channel");
-    json_path.push("channel_playlists.json");
+async fn channel_playlists() {
+    let json_path = path!(*TESTFILES_DIR / "channel" / "channel_playlists.json");
    if json_path.exists() {
        return;
    }
@ -359,10 +336,8 @@ async fn channel_playlists(testfiles: &Path) {
        .unwrap();
 }

-async fn channel_info(testfiles: &Path) {
-    let mut json_path = testfiles.to_path_buf();
-    json_path.push("channel");
-    json_path.push("channel_info.json");
+async fn channel_info() {
+    let json_path = path!(*TESTFILES_DIR / "channel" / "channel_info.json");
    if json_path.exists() {
        return;
    }
@ -374,10 +349,8 @@ async fn channel_info(testfiles: &Path) {
        .unwrap();
 }

-async fn channel_videos_cont(testfiles: &Path) {
-    let mut json_path = testfiles.to_path_buf();
-    json_path.push("channel");
-    json_path.push("channel_videos_cont.json");
+async fn channel_videos_cont() {
+    let json_path = path!(*TESTFILES_DIR / "channel" / "channel_videos_cont.json");
    if json_path.exists() {
        return;
    }
@ -393,10 +366,8 @@ async fn channel_videos_cont(testfiles: &Path) {
    videos.content.next(rp.query()).await.unwrap().unwrap();
 }

-async fn channel_playlists_cont(testfiles: &Path) {
-    let mut json_path = testfiles.to_path_buf();
-    json_path.push("channel");
-    json_path.push("channel_playlists_cont.json");
+async fn channel_playlists_cont() {
+    let json_path = path!(*TESTFILES_DIR / "channel" / "channel_playlists_cont.json");
    if json_path.exists() {
        return;
    }
@ -412,10 +383,8 @@ async fn channel_playlists_cont(testfiles: &Path) {
    playlists.content.next(rp.query()).await.unwrap().unwrap();
 }

-async fn search(testfiles: &Path) {
-    let mut json_path = testfiles.to_path_buf();
-    json_path.push("search");
-    json_path.push("default.json");
+async fn search() {
+    let json_path = path!(*TESTFILES_DIR / "search" / "default.json");
    if json_path.exists() {
        return;
    }
@ -424,10 +393,8 @@ async fn search(testfiles: &Path) {
    rp.query().search("doobydoobap").await.unwrap();
 }

-async fn search_cont(testfiles: &Path) {
-    let mut json_path = testfiles.to_path_buf();
-    json_path.push("search");
-    json_path.push("cont.json");
+async fn search_cont() {
+    let json_path = path!(*TESTFILES_DIR / "search" / "cont.json");
    if json_path.exists() {
        return;
    }
@ -439,10 +406,8 @@ async fn search_cont(testfiles: &Path) {
    search.items.next(rp.query()).await.unwrap().unwrap();
 }

-async fn search_playlists(testfiles: &Path) {
-    let mut json_path = testfiles.to_path_buf();
-    json_path.push("search");
-    json_path.push("playlists.json");
+async fn search_playlists() {
+    let json_path = path!(*TESTFILES_DIR / "search" / "playlists.json");
    if json_path.exists() {
        return;
    }
@ -454,10 +419,8 @@ async fn search_playlists(testfiles: &Path) {
        .unwrap();
 }

-async fn search_empty(testfiles: &Path) {
-    let mut json_path = testfiles.to_path_buf();
-    json_path.push("search");
-    json_path.push("empty.json");
+async fn search_empty() {
+    let json_path = path!(*TESTFILES_DIR / "search" / "empty.json");
    if json_path.exists() {
        return;
    }
@ -474,10 +437,8 @@ async fn search_empty(testfiles: &Path) {
        .unwrap();
 }

-async fn startpage(testfiles: &Path) {
-    let mut json_path = testfiles.to_path_buf();
-    json_path.push("trends");
-    json_path.push("startpage.json");
+async fn startpage() {
+    let json_path = path!(*TESTFILES_DIR / "trends" / "startpage.json");
    if json_path.exists() {
        return;
    }
@ -486,10 +447,8 @@ async fn startpage(testfiles: &Path) {
    rp.query().startpage().await.unwrap();
 }

-async fn startpage_cont(testfiles: &Path) {
-    let mut json_path = testfiles.to_path_buf();
-    json_path.push("trends");
-    json_path.push("startpage_cont.json");
+async fn startpage_cont() {
+    let json_path = path!(*TESTFILES_DIR / "trends" / "startpage_cont.json");
    if json_path.exists() {
        return;
    }
@ -501,10 +460,8 @@ async fn startpage_cont(testfiles: &Path) {
    startpage.next(rp.query()).await.unwrap();
 }

-async fn trending(testfiles: &Path) {
-    let mut json_path = testfiles.to_path_buf();
-    json_path.push("trends");
-    json_path.push("trending.json");
+async fn trending() {
+    let json_path = path!(*TESTFILES_DIR / "trends" / "trending_videos.json");
    if json_path.exists() {
        return;
    }
@ -513,15 +470,13 @@ async fn trending(testfiles: &Path) {
    rp.query().trending().await.unwrap();
 }

-async fn music_playlist(testfiles: &Path) {
+async fn music_playlist() {
    for (name, id) in [
        ("short", "RDCLAK5uy_kFQXdnqMaQCVx2wpUM4ZfbsGCDibZtkJk"),
        ("long", "PL5dDx681T4bR7ZF1IuWzOv1omlRbE7PiJ"),
        ("nomusic", "PL1J-6JOckZtE_P9Xx8D3b2O6w0idhuKBe"),
    ] {
-        let mut json_path = testfiles.to_path_buf();
-        json_path.push("music_playlist");
-        json_path.push(format!("playlist_{name}.json"));
+        let json_path = path!(*TESTFILES_DIR / "music_playlist" / format!("playlist_{name}.json"));
        if json_path.exists() {
            continue;
        }
@ -531,10 +486,8 @@ async fn music_playlist(testfiles: &Path) {
    }
 }

-async fn music_playlist_cont(testfiles: &Path) {
-    let mut json_path = testfiles.to_path_buf();
-    json_path.push("music_playlist");
-    json_path.push("playlist_cont.json");
+async fn music_playlist_cont() {
+    let json_path = path!(*TESTFILES_DIR / "music_playlist" / "playlist_cont.json");
    if json_path.exists() {
        return;
    }
@ -550,10 +503,8 @@ async fn music_playlist_cont(testfiles: &Path) {
    playlist.tracks.next(rp.query()).await.unwrap().unwrap();
 }

-async fn music_playlist_related(testfiles: &Path) {
-    let mut json_path = testfiles.to_path_buf();
-    json_path.push("music_playlist");
-    json_path.push("playlist_related.json");
+async fn music_playlist_related() {
+    let json_path = path!(*TESTFILES_DIR / "music_playlist" / "playlist_related.json");
    if json_path.exists() {
        return;
    }
@ -574,7 +525,7 @@ async fn music_playlist_related(testfiles: &Path) {
        .unwrap();
 }

-async fn music_album(testfiles: &Path) {
+async fn music_album() {
    for (name, id) in [
        ("one_artist", "MPREb_nlBWQROfvjo"),
        ("various_artists", "MPREb_8QkDeEIawvX"),
@ -582,9 +533,7 @@ async fn music_album(testfiles: &Path) {
        ("description", "MPREb_PiyfuVl6aYd"),
        ("unavailable", "MPREb_AzuWg8qAVVl"),
    ] {
-        let mut json_path = testfiles.to_path_buf();
-        json_path.push("music_playlist");
-        json_path.push(format!("album_{name}.json"));
+        let json_path = path!(*TESTFILES_DIR / "music_playlist" / format!("album_{name}.json"));
        if json_path.exists() {
            continue;
        }
@ -594,16 +543,14 @@ async fn music_album(testfiles: &Path) {
    }
 }

-async fn music_search(testfiles: &Path) {
+async fn music_search() {
    for (name, query) in [
        ("default", "black mamba"),
        ("typo", "liblingsmensch"),
        ("radio", "pop radio"),
        ("artist", "taylor swift"),
    ] {
-        let mut json_path = testfiles.to_path_buf();
-        json_path.push("music_search");
-        json_path.push(format!("main_{name}.json"));
+        let json_path = path!(*TESTFILES_DIR / "music_search" / format!("main_{name}.json"));
        if json_path.exists() {
            continue;
        }
@ -613,7 +560,7 @@ async fn music_search(testfiles: &Path) {
    }
 }

-async fn music_search_tracks(testfiles: &Path) {
+async fn music_search_tracks() {
    for (name, query, videos) in [
        ("default", "black mamba", false),
        ("videos", "black mamba", true),
@ -624,9 +571,7 @@ async fn music_search_tracks(testfiles: &Path) {
            false,
        ),
    ] {
-        let mut json_path = testfiles.to_path_buf();
-        json_path.push("music_search");
-        json_path.push(format!("tracks_{name}.json"));
+        let json_path = path!(*TESTFILES_DIR / "music_search" / format!("tracks_{name}.json"));
        if json_path.exists() {
            continue;
        }
@ -640,10 +585,8 @@ async fn music_search_tracks(testfiles: &Path) {
    }
 }

-async fn music_search_albums(testfiles: &Path) {
-    let mut json_path = testfiles.to_path_buf();
-    json_path.push("music_search");
-    json_path.push("albums.json");
+async fn music_search_albums() {
+    let json_path = path!(*TESTFILES_DIR / "music_search" / "albums.json");
    if json_path.exists() {
        return;
    }
@ -652,10 +595,8 @@ async fn music_search_albums(testfiles: &Path) {
    rp.query().music_search_albums("black mamba").await.unwrap();
 }

-async fn music_search_artists(testfiles: &Path) {
-    let mut json_path = testfiles.to_path_buf();
-    json_path.push("music_search");
-    json_path.push("artists.json");
+async fn music_search_artists() {
+    let json_path = path!(*TESTFILES_DIR / "music_search" / "artists.json");
    if json_path.exists() {
        return;
    }
@ -667,11 +608,9 @@ async fn music_search_artists(testfiles: &Path) {
        .unwrap();
 }

-async fn music_search_playlists(testfiles: &Path) {
+async fn music_search_playlists() {
    for (name, community) in [("ytm", false), ("community", true)] {
-        let mut json_path = testfiles.to_path_buf();
-        json_path.push("music_search");
-        json_path.push(format!("playlists_{name}.json"));
+        let json_path = path!(*TESTFILES_DIR / "music_search" / format!("playlists_{name}.json"));
        if json_path.exists() {
            continue;
        }
@ -684,10 +623,8 @@ async fn music_search_playlists(testfiles: &Path) {
    }
 }

-async fn music_search_cont(testfiles: &Path) {
-    let mut json_path = testfiles.to_path_buf();
-    json_path.push("music_search");
-    json_path.push("tracks_cont.json");
+async fn music_search_cont() {
+    let json_path = path!(*TESTFILES_DIR / "music_search" / "tracks_cont.json");
    if json_path.exists() {
        return;
    }
@ -699,11 +636,9 @@ async fn music_search_cont(testfiles: &Path) {
    res.items.next(rp.query()).await.unwrap().unwrap();
 }

-async fn music_search_suggestion(testfiles: &Path) {
+async fn music_search_suggestion() {
    for (name, query) in [("default", "t"), ("empty", "reujbhevmfndxnjrze")] {
-        let mut json_path = testfiles.to_path_buf();
-        json_path.push("music_search");
-        json_path.push(format!("suggestion_{name}.json"));
+        let json_path = path!(*TESTFILES_DIR / "music_search" / format!("suggestion_{name}.json"));
        if json_path.exists() {
            continue;
        }
@ -713,7 +648,7 @@ async fn music_search_suggestion(testfiles: &Path) {
    }
 }

-async fn music_artist(testfiles: &Path) {
+async fn music_artist() {
    for (name, id, all_albums) in [
        ("default", "UClmXPfaYhXOYsNn_QUyheWQ", true),
        ("no_more_albums", "UC_vmjW5e1xEHhYjY2a0kK1A", true),
@ -722,9 +657,7 @@ async fn music_artist(testfiles: &Path) {
        ("only_more_singles", "UC0aXrjVxG5pZr99v77wZdPQ", true),
        ("secondary_channel", "UCC9192yGQD25eBZgFZ84MPw", false),
    ] {
-        let mut json_path = testfiles.to_path_buf();
-        json_path.push("music_artist");
-        json_path.push(format!("artist_{name}.json"));
+        let json_path = path!(*TESTFILES_DIR / "music_artist" / format!("artist_{name}.json"));
        if json_path.exists() {
            continue;
        }
@ -734,11 +667,9 @@ async fn music_artist(testfiles: &Path) {
    }
 }

-async fn music_details(testfiles: &Path) {
+async fn music_details() {
    for (name, id) in [("mv", "ZeerrnuLi5E"), ("track", "7nigXQS1Xb0")] {
-        let mut json_path = testfiles.to_path_buf();
-        json_path.push("music_details");
-        json_path.push(format!("details_{name}.json"));
+        let json_path = path!(*TESTFILES_DIR / "music_details" / format!("details_{name}.json"));
        if json_path.exists() {
            continue;
        }
@ -748,10 +679,8 @@ async fn music_details(testfiles: &Path) {
    }
 }

-async fn music_lyrics(testfiles: &Path) {
-    let mut json_path = testfiles.to_path_buf();
-    json_path.push("music_details");
-    json_path.push("lyrics.json");
+async fn music_lyrics() {
+    let json_path = path!(*TESTFILES_DIR / "music_details" / "lyrics.json");
    if json_path.exists() {
        return;
    }
@ -766,10 +695,8 @@ async fn music_lyrics(testfiles: &Path) {
        .unwrap();
 }

-async fn music_related(testfiles: &Path) {
-    let mut json_path = testfiles.to_path_buf();
-    json_path.push("music_details");
-    json_path.push("related.json");
+async fn music_related() {
+    let json_path = path!(*TESTFILES_DIR / "music_details" / "related.json");
    if json_path.exists() {
        return;
    }
@ -784,11 +711,9 @@ async fn music_related(testfiles: &Path) {
        .unwrap();
 }

-async fn music_radio(testfiles: &Path) {
+async fn music_radio() {
    for (name, id) in [("mv", "RDAMVMZeerrnuLi5E"), ("track", "RDAMVM7nigXQS1Xb0")] {
-        let mut json_path = testfiles.to_path_buf();
-        json_path.push("music_details");
-        json_path.push(format!("radio_{name}.json"));
+        let json_path = path!(*TESTFILES_DIR / "music_details" / format!("radio_{name}.json"));
        if json_path.exists() {
            continue;
        }
@ -798,10 +723,8 @@ async fn music_radio(testfiles: &Path) {
    }
 }

-async fn music_radio_cont(testfiles: &Path) {
-    let mut json_path = testfiles.to_path_buf();
-    json_path.push("music_details");
-    json_path.push("radio_cont.json");
+async fn music_radio_cont() {
+    let json_path = path!(*TESTFILES_DIR / "music_details" / "radio_cont.json");
    if json_path.exists() {
        return;
    }
@ -813,10 +736,8 @@ async fn music_radio_cont(testfiles: &Path) {
    res.next(rp.query()).await.unwrap().unwrap();
 }

-async fn music_new_albums(testfiles: &Path) {
-    let mut json_path = testfiles.to_path_buf();
-    json_path.push("music_new");
-    json_path.push("albums_default.json");
+async fn music_new_albums() {
+    let json_path = path!(*TESTFILES_DIR / "music_new" / "albums_default.json");
    if json_path.exists() {
        return;
    }
@ -825,10 +746,8 @@ async fn music_new_albums(testfiles: &Path) {
    rp.query().music_new_albums().await.unwrap();
 }

-async fn music_new_videos(testfiles: &Path) {
-    let mut json_path = testfiles.to_path_buf();
-    json_path.push("music_new");
-    json_path.push("videos_default.json");
+async fn music_new_videos() {
+    let json_path = path!(*TESTFILES_DIR / "music_new" / "videos_default.json");
    if json_path.exists() {
        return;
    }
@ -837,11 +756,9 @@ async fn music_new_videos(testfiles: &Path) {
    rp.query().music_new_videos().await.unwrap();
 }

-async fn music_charts(testfiles: &Path) {
+async fn music_charts() {
    for (name, country) in [("global", Some(Country::Zz)), ("US", Some(Country::Us))] {
-        let mut json_path = testfiles.to_path_buf();
-        json_path.push("music_charts");
-        json_path.push(&format!("charts_{name}.json"));
+        let json_path = path!(*TESTFILES_DIR / "music_charts" / format!("charts_{name}.json"));
        if json_path.exists() {
            continue;
        }
@ -851,10 +768,8 @@ async fn music_charts(testfiles: &Path) {
    }
 }

-async fn music_genres(testfiles: &Path) {
-    let mut json_path = testfiles.to_path_buf();
-    json_path.push("music_genres");
-    json_path.push("genres.json");
+async fn music_genres() {
+    let json_path = path!(*TESTFILES_DIR / "music_genres" / "genres.json");
    if json_path.exists() {
        return;
    }
@ -863,14 +778,12 @@ async fn music_genres(testfiles: &Path) {
    rp.query().music_genres().await.unwrap();
 }

-async fn music_genre(testfiles: &Path) {
+async fn music_genre() {
    for (name, id) in [
        ("default", "ggMPOg1uX1lMbVZmbzl6NlJ3"),
        ("mood", "ggMPOg1uX1JOQWZFeDByc2Jm"),
    ] {
-        let mut json_path = testfiles.to_path_buf();
-        json_path.push("music_genres");
-        json_path.push(&format!("genre_{name}.json"));
+        let json_path = path!(*TESTFILES_DIR / "music_genres" / format!("genre_{name}.json"));
        if json_path.exists() {
            continue;
        }
--- a/codegen/src/gen_dictionary.rs
+++ b/codegen/src/gen_dictionary.rs
@ -1,13 +1,13 @@
 use std::fmt::Write;
-use std::path::Path;

 use once_cell::sync::Lazy;
+use path_macro::path;
 use regex::Regex;
-use rustypipe::timeago::TimeUnit;

-use crate::util;
-
-const TARGET_PATH: &str = "src/util/dictionary.rs";
+use crate::{
+    model::TimeUnit,
+    util::{self, SRC_DIR},
+};

 fn parse_tu(tu: &str) -> (u8, Option<TimeUnit>) {
    static TU_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"^(\d*)(\w?)$").unwrap());
@ -30,8 +30,8 @@ fn parse_tu(tu: &str) -> (u8, Option<TimeUnit>) {
    }
 }

-pub fn generate_dictionary(project_root: &Path) {
-    let dict = util::read_dict(project_root);
+pub fn generate_dictionary() {
+    let dict = util::read_dict();

    let code_head = r#"// This file is automatically generated. DO NOT EDIT.
 // See codegen/gen_dictionary.rs for the generation code.
@ -169,7 +169,6 @@ pub(crate) fn entry(lang: Language) -> Entry {

    let code = format!("{code_head}\n{code_timeago_tokens}");

-    let mut target_path = project_root.to_path_buf();
-    target_path.push(TARGET_PATH);
+    let target_path = path!(*SRC_DIR / "util" / "dictionary.rs");
    std::fs::write(target_path, code).unwrap();
 }
--- a/codegen/src/gen_locales.rs
+++ b/codegen/src/gen_locales.rs
@ -1,14 +1,15 @@
 use std::collections::BTreeMap;
 use std::fmt::Write;
-use std::path::Path;

+use path_macro::path;
 use reqwest::header;
 use reqwest::Client;
 use serde::Deserialize;
 use serde_with::serde_as;
 use serde_with::VecSkipError;

-use crate::util::Text;
+use crate::model::Text;
+use crate::util::SRC_DIR;

 #[serde_as]
 #[derive(Clone, Debug, Deserialize)]
@ -137,7 +138,7 @@ struct LanguageCountryCommand {
    hl: String,
 }

-pub async fn generate_locales(project_root: &Path) {
+pub async fn generate_locales() {
    let (languages, countries) = get_locales().await;

    let code_head = r#"// This file is automatically generated. DO NOT EDIT.
@ -288,8 +289,7 @@ pub enum Country {
        "{code_head}\n{code_langs}\n{code_countries}\n{code_lang_array}\n{code_country_array}\n{code_lang_names}\n{code_country_names}\n{code_foot}"
    );

-    let mut target_path = project_root.to_path_buf();
-    target_path.push("src/param/locale.rs");
+    let target_path = path!(*SRC_DIR / "param" / "locale.rs");
    std::fs::write(target_path, code).unwrap();
 }

--- a/codegen/src/main.rs
+++ b/codegen/src/main.rs
@ -2,21 +2,19 @@ mod abtest;
 mod collect_album_types;
 mod collect_large_numbers;
 mod collect_playlist_dates;
+mod collect_video_durations;
 mod download_testfiles;
 mod gen_dictionary;
 mod gen_locales;
+mod model;
 mod util;

-use std::path::PathBuf;
-
 use clap::{Parser, Subcommand};

 #[derive(Parser)]
 struct Cli {
    #[clap(subcommand)]
    command: Commands,
-    #[clap(short = 'd', default_value = "..")]
-    project_root: PathBuf,
    #[clap(short, default_value = "8")]
    concurrency: usize,
 }
@ -26,6 +24,7 @@ enum Commands {
    CollectPlaylistDates,
    CollectLargeNumbers,
    CollectAlbumTypes,
+    CollectVideoDurations,
    ParsePlaylistDates,
    ParseLargeNumbers,
    ParseAlbumTypes,
@ -47,28 +46,25 @@ async fn main() {

    match cli.command {
        Commands::CollectPlaylistDates => {
-            collect_playlist_dates::collect_dates(&cli.project_root, cli.concurrency).await;
+            collect_playlist_dates::collect_dates(cli.concurrency).await;
        }
        Commands::CollectLargeNumbers => {
-            collect_large_numbers::collect_large_numbers(&cli.project_root, cli.concurrency).await;
+            collect_large_numbers::collect_large_numbers(cli.concurrency).await;
        }
        Commands::CollectAlbumTypes => {
-            collect_album_types::collect_album_types(&cli.project_root, cli.concurrency).await;
+            collect_album_types::collect_album_types(cli.concurrency).await;
        }
-        Commands::ParsePlaylistDates => {
-            collect_playlist_dates::write_samples_to_dict(&cli.project_root)
+        Commands::CollectVideoDurations => {
+            collect_video_durations::collect_video_durations(cli.concurrency).await;
        }
-        Commands::ParseLargeNumbers => {
-            collect_large_numbers::write_samples_to_dict(&cli.project_root)
-        }
-        Commands::ParseAlbumTypes => collect_album_types::write_samples_to_dict(&cli.project_root),
+        Commands::ParsePlaylistDates => collect_playlist_dates::write_samples_to_dict(),
+        Commands::ParseLargeNumbers => collect_large_numbers::write_samples_to_dict(),
+        Commands::ParseAlbumTypes => collect_album_types::write_samples_to_dict(),
        Commands::GenLocales => {
-            gen_locales::generate_locales(&cli.project_root).await;
-        }
-        Commands::GenDict => gen_dictionary::generate_dictionary(&cli.project_root),
-        Commands::DownloadTestfiles => {
-            download_testfiles::download_testfiles(&cli.project_root).await
+            gen_locales::generate_locales().await;
        }
+        Commands::GenDict => gen_dictionary::generate_dictionary(),
+        Commands::DownloadTestfiles => download_testfiles::download_testfiles().await,
        Commands::AbTest { id, n } => {
            match id {
                Some(id) => {
--- a/codegen/src/model.rs
+++ b/codegen/src/model.rs
@ -0,0 +1,260 @@
+use std::collections::BTreeMap;
+
+use rustypipe::{client::YTContext, model::AlbumType, param::Language};
+use serde::{Deserialize, Serialize};
+use serde_with::{serde_as, DefaultOnError, VecSkipError};
+
+#[derive(Debug, Default, Serialize, Deserialize)]
+#[serde(default)]
+pub struct DictEntry {
+    /// List of languages that should be treated equally (e.g. EnUs/EnGb/EnIn)
+    pub equivalent: Vec<Language>,
+    /// Should the language be parsed by character instead of by word?
+    /// (e.g. Chinese/Japanese)
+    pub by_char: bool,
+    /// Tokens for parsing timeago strings.
+    ///
+    /// Format: Parsed token -> \[Quantity\] Identifier
+    ///
+    /// Identifiers: `Y`(ear), `M`(month), `W`(eek), `D`(ay),
+    /// `h`(our), `m`(inute), `s`(econd)
+    pub timeago_tokens: BTreeMap<String, String>,
+    /// Order in which to parse numeric date components. Formatted as
+    /// a string of date identifiers (Y, M, D).
+    ///
+    /// Examples:
+    ///
+    /// - 03.01.2020 => `"DMY"`
+    /// - Jan 3, 2020 => `"DY"`
+    pub date_order: String,
+    /// Tokens for parsing month names.
+    ///
+    /// Format: Parsed token -> Month number (starting from 1)
+    pub months: BTreeMap<String, u8>,
+    /// Tokens for parsing date strings with no digits (e.g. Today, Tomorrow)
+    ///
+    /// Format: Parsed token -> \[Quantity\] Identifier
+    pub timeago_nd_tokens: BTreeMap<String, String>,
+    /// Are commas (instead of points) used as decimal separators?
+    pub comma_decimal: bool,
+    /// Tokens for parsing decimal prefixes (K, M, B, ...)
+    ///
+    /// Format: Parsed token -> decimal power
+    pub number_tokens: BTreeMap<String, u8>,
+    /// Tokens for parsing number strings with no digits (e.g. "No videos")
+    ///
+    /// Format: Parsed token -> value
+    pub number_nd_tokens: BTreeMap<String, u8>,
+    /// Names of album types (Album, Single, ...)
+    ///
+    /// Format: Parsed text -> Album type
+    pub album_types: BTreeMap<String, AlbumType>,
+}
+
+/// Parsed time unit
+#[derive(Debug, Copy, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[serde(rename_all = "lowercase")]
+pub enum TimeUnit {
+    Second,
+    Minute,
+    Hour,
+    Day,
+    Week,
+    Month,
+    Year,
+}
+
+#[derive(Debug, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct QBrowse<'a> {
+    pub context: YTContext<'a>,
+    pub browse_id: &'a str,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub params: Option<&'a str>,
+}
+
+#[derive(Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct QCont<'a> {
+    pub context: YTContext<'a>,
+    pub continuation: &'a str,
+}
+
+#[derive(Clone, Debug, Deserialize)]
+pub struct TextRuns {
+    pub runs: Vec<Text>,
+}
+
+#[derive(Clone, Debug, Deserialize)]
+pub struct Text {
+    #[serde(alias = "simpleText")]
+    pub text: String,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct Channel {
+    pub contents: Contents,
+    pub header: ChannelHeader,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct ChannelHeader {
+    pub c4_tabbed_header_renderer: HeaderRenderer,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct HeaderRenderer {
+    pub subscriber_count_text: Text,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct Contents {
+    pub two_column_browse_results_renderer: TabsRenderer,
+}
+
+#[serde_as]
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct TabsRenderer {
+    #[serde_as(as = "VecSkipError<_>")]
+    pub tabs: Vec<TabRendererWrap>,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct TabRendererWrap {
+    pub tab_renderer: TabRenderer,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct TabRenderer {
+    pub content: RichGridRendererWrap,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct RichGridRendererWrap {
+    pub rich_grid_renderer: RichGridRenderer,
+}
+
+#[serde_as]
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct RichGridRenderer {
+    #[serde_as(as = "VecSkipError<_>")]
+    pub contents: Vec<RichItemRendererWrap>,
+    #[serde(default)]
+    #[serde_as(as = "DefaultOnError")]
+    pub header: Option<RichGridHeader>,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct RichItemRendererWrap {
+    pub rich_item_renderer: RichItemRenderer,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct RichItemRenderer {
+    pub content: VideoRendererWrap,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct VideoRendererWrap {
+    pub video_renderer: VideoRenderer,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct VideoRenderer {
+    /// `24,194 views`
+    pub view_count_text: Text,
+    /// `19K views`
+    pub short_view_count_text: Text,
+    pub length_text: LengthText,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct LengthText {
+    /// `18 minutes, 26 seconds`
+    pub accessibility: Accessibility,
+    /// `18:26`
+    pub simple_text: String,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct Accessibility {
+    pub accessibility_data: AccessibilityData,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct AccessibilityData {
+    pub label: String,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct RichGridHeader {
+    pub feed_filter_chip_bar_renderer: ChipBar,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct ChipBar {
+    pub contents: Vec<Chip>,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct Chip {
+    pub chip_cloud_chip_renderer: ChipRenderer,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct ChipRenderer {
+    pub navigation_endpoint: NavigationEndpoint,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct NavigationEndpoint {
+    pub continuation_command: ContinuationCommand,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct ContinuationCommand {
+    pub token: String,
+}
+
+#[serde_as]
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct ContinuationResponse {
+    pub on_response_received_actions: Vec<ContinuationAction>,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct ContinuationAction {
+    pub reload_continuation_items_command: ContinuationItemsWrap,
+}
+
+#[serde_as]
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct ContinuationItemsWrap {
+    #[serde_as(as = "VecSkipError<_>")]
+    pub continuation_items: Vec<RichItemRendererWrap>,
+}
--- a/codegen/src/util.rs
+++ b/codegen/src/util.rs
@ -1,120 +1,50 @@
-use std::{
-    collections::BTreeMap,
-    fs::File,
-    io::BufReader,
-    path::{Path, PathBuf},
-    str::FromStr,
-};
+use std::{collections::BTreeMap, fs::File, io::BufReader, path::PathBuf, str::FromStr};

 use once_cell::sync::Lazy;
 use path_macro::path;
-use rustypipe::{client::YTContext, model::AlbumType, param::Language};
+use regex::Regex;
+use rustypipe::param::Language;
 use serde::{Deserialize, Serialize};

-static DICT_PATH: Lazy<PathBuf> = Lazy::new(|| path!("testfiles" / "dict" / "dictionary.json"));
-static DICT_OVERRIDE_PATH: Lazy<PathBuf> =
-    Lazy::new(|| path!("testfiles" / "dict" / "dictionary_override.json"));
+use crate::model::DictEntry;
+
+/// Get the path of the `testfiles` directory
+pub static TESTFILES_DIR: Lazy<PathBuf> = Lazy::new(|| {
+    path!(env!("CARGO_MANIFEST_DIR") / ".." / "testfiles")
+        .canonicalize()
+        .unwrap()
+});
+/// Get the path of the `dict` directory
+pub static DICT_DIR: Lazy<PathBuf> = Lazy::new(|| path!(*TESTFILES_DIR / "dict"));
+/// Get the path of the `src` directory
+pub static SRC_DIR: Lazy<PathBuf> = Lazy::new(|| path!(env!("CARGO_MANIFEST_DIR") / ".." / "src"));

 type Dictionary = BTreeMap<Language, DictEntry>;
 type DictionaryOverride = BTreeMap<Language, DictOverrideEntry>;

 #[derive(Debug, Default, Serialize, Deserialize)]
 #[serde(default)]
-pub struct DictEntry {
-    /// List of languages that should be treated equally (e.g. EnUs/EnGb/EnIn)
-    pub equivalent: Vec<Language>,
-    /// Should the language be parsed by character instead of by word?
-    /// (e.g. Chinese/Japanese)
-    pub by_char: bool,
-    /// Tokens for parsing timeago strings.
-    ///
-    /// Format: Parsed token -> \[Quantity\] Identifier
-    ///
-    /// Identifiers: `Y`(ear), `M`(month), `W`(eek), `D`(ay),
-    /// `h`(our), `m`(inute), `s`(econd)
-    pub timeago_tokens: BTreeMap<String, String>,
-    /// Order in which to parse numeric date components. Formatted as
-    /// a string of date identifiers (Y, M, D).
-    ///
-    /// Examples:
-    ///
-    /// - 03.01.2020 => `"DMY"`
-    /// - Jan 3, 2020 => `"DY"`
-    pub date_order: String,
-    /// Tokens for parsing month names.
-    ///
-    /// Format: Parsed token -> Month number (starting from 1)
-    pub months: BTreeMap<String, u8>,
-    /// Tokens for parsing date strings with no digits (e.g. Today, Tomorrow)
-    ///
-    /// Format: Parsed token -> \[Quantity\] Identifier
-    pub timeago_nd_tokens: BTreeMap<String, String>,
-    /// Are commas (instead of points) used as decimal separators?
-    pub comma_decimal: bool,
-    /// Tokens for parsing decimal prefixes (K, M, B, ...)
-    ///
-    /// Format: Parsed token -> decimal power
-    pub number_tokens: BTreeMap<String, u8>,
-    /// Tokens for parsing number strings with no digits (e.g. "No videos")
-    ///
-    /// Format: Parsed token -> value
-    pub number_nd_tokens: BTreeMap<String, u8>,
-    /// Names of album types (Album, Single, ...)
-    ///
-    /// Format: Parsed text -> Album type
-    pub album_types: BTreeMap<String, AlbumType>,
+struct DictOverrideEntry {
+    number_tokens: BTreeMap<String, Option<u8>>,
+    number_nd_tokens: BTreeMap<String, Option<u8>>,
 }

-#[derive(Debug, Default, Serialize, Deserialize)]
-#[serde(default)]
-pub struct DictOverrideEntry {
-    pub number_tokens: BTreeMap<String, Option<u8>>,
-    pub number_nd_tokens: BTreeMap<String, Option<u8>>,
-}
-
-#[derive(Debug, Serialize)]
-#[serde(rename_all = "camelCase")]
-pub struct QBrowse<'a> {
-    pub context: YTContext<'a>,
-    pub browse_id: &'a str,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub params: Option<&'a str>,
-}
-
-#[derive(Serialize)]
-#[serde(rename_all = "camelCase")]
-pub struct QCont<'a> {
-    pub context: YTContext<'a>,
-    pub continuation: &'a str,
-}
-
-#[derive(Clone, Debug, Deserialize)]
-pub struct TextRuns {
-    pub runs: Vec<Text>,
-}
-
-#[derive(Clone, Debug, Deserialize)]
-pub struct Text {
-    #[serde(alias = "simpleText")]
-    pub text: String,
-}
-
-pub fn read_dict(project_root: &Path) -> Dictionary {
-    let json_path = path!(project_root / *DICT_PATH);
+pub fn read_dict() -> Dictionary {
+    let json_path = path!(*DICT_DIR / "dictionary.json");
    let json_file = File::open(json_path).unwrap();
    serde_json::from_reader(BufReader::new(json_file)).unwrap()
 }

-pub fn read_dict_override(project_root: &Path) -> DictionaryOverride {
-    let json_path = path!(project_root / *DICT_OVERRIDE_PATH);
+fn read_dict_override() -> DictionaryOverride {
+    let json_path = path!(*DICT_DIR / "dictionary_override.json");
    let json_file = File::open(json_path).unwrap();
    serde_json::from_reader(BufReader::new(json_file)).unwrap()
 }

-pub fn write_dict(project_root: &Path, dict: Dictionary) {
-    let dict_override = read_dict_override(project_root);
+pub fn write_dict(dict: Dictionary) {
+    let dict_override = read_dict_override();

-    let json_path = path!(project_root / *DICT_PATH);
+    let json_path = path!(*DICT_DIR / "dictionary.json");
    let json_file = File::create(json_path).unwrap();

    fn apply_map<K: Clone + Ord, V: Clone>(map: &mut BTreeMap<K, V>, or: &BTreeMap<K, Option<V>>) {
@ -251,3 +181,26 @@ pub fn parse_largenum_en(string: &str) -> Option<u64> {

    num.checked_mul((10_u64).checked_pow(exp.try_into().ok()?)?)
 }
+
+/// Parse textual video length (e.g. `0:49`, `2:02` or `1:48:18`)
+/// and return the duration in seconds.
+pub fn parse_video_length(text: &str) -> Option<u32> {
+    static VIDEO_LENGTH_REGEX: Lazy<Regex> =
+        Lazy::new(|| Regex::new(r#"(?:(\d+)[:.])?(\d{1,2})[:.](\d{2})"#).unwrap());
+    VIDEO_LENGTH_REGEX.captures(text).map(|cap| {
+        let hrs = cap
+            .get(1)
+            .and_then(|x| x.as_str().parse::<u32>().ok())
+            .unwrap_or_default();
+        let min = cap
+            .get(2)
+            .and_then(|x| x.as_str().parse::<u32>().ok())
+            .unwrap_or_default();
+        let sec = cap
+            .get(3)
+            .and_then(|x| x.as_str().parse::<u32>().ok())
+            .unwrap_or_default();
+
+        hrs * 3600 + min * 60 + sec
+    })
+}