Merge branch 'intl-tests'

2023-05-07 15:06:27 +02:00 · 2023-05-07 15:06:27 +02:00 · b3331b36a7
commit b3331b36a7
parent 25025ef701 781064218d
69 changed files with 55604 additions and 32189 deletions
--- a/src/util/dictionary.rs
+++ b/src/util/dictionary.rs
--- a/src/util/mod.rs
+++ b/src/util/mod.rs
@ -2,6 +2,7 @@ mod date;
 mod protobuf;

 pub mod dictionary;
+pub mod timeago;

 pub use date::{now_sec, shift_months, shift_years};
 pub use protobuf::{string_from_pb, ProtoBuilder};
@ -19,7 +20,7 @@ use rand::Rng;
 use regex::Regex;
 use url::Url;

-use crate::{error::Error, param::Language};
+use crate::{error::Error, param::Language, serializer::text::TextComponent};

 pub static VIDEO_ID_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[A-Za-z0-9_-]{11}$").unwrap());
 pub static CHANNEL_ID_REGEX: Lazy<Regex> =
@ -34,8 +35,6 @@ pub static VANITY_PATH_REGEX: Lazy<Regex> = Lazy::new(|| {

 /// Separator string for YouTube Music subtitles
 pub const DOT_SEPARATOR: &str = " • ";
-/// YouTube Music name (author of official playlists)
-pub const YT_MUSIC_NAME: &str = "YouTube Music";
 pub const VARIOUS_ARTISTS: &str = "Various Artists";
 pub const PLAYLIST_ID_ALBUM_PREFIX: &str = "OLAK";

@ -143,7 +142,7 @@ where
 /// and return the duration in seconds.
 pub fn parse_video_length(text: &str) -> Option<u32> {
    static VIDEO_LENGTH_REGEX: Lazy<Regex> =
-        Lazy::new(|| Regex::new(r#"(?:(\d+):)?(\d{1,2}):(\d{2})"#).unwrap());
+        Lazy::new(|| Regex::new(r#"(?:(\d+)[:.])?(\d{1,2})[:.](\d{2})"#).unwrap());
    VIDEO_LENGTH_REGEX.captures(text).map(|cap| {
        let hrs = cap
            .get(1)
@ -272,56 +271,114 @@ impl<T> TryRemove<T> for Vec<T> {
    }
 }

+/// Check if a channel name equals "YouTube Music"
+/// (the author of original YouTube music playlists)
+pub(crate) fn is_ytm(text: &TextComponent) -> bool {
+    if let TextComponent::Text { text } = text {
+        text.starts_with("YouTube")
+    } else {
+        false
+    }
+}
+
+/// Check if a language should be parsed by character
+pub fn lang_by_char(lang: Language) -> bool {
+    matches!(
+        lang,
+        Language::Ja | Language::ZhCn | Language::ZhHk | Language::ZhTw
+    )
+}
+
 /// Parse a large, textual number (e.g. `1.4M subscribers`, `22K views`)
 pub fn parse_large_numstr<F>(string: &str, lang: Language) -> Option<F>
 where
    F: TryFrom<u64>,
 {
+    // Special case for Gujarati: the "no views" text does not contain
+    // any parseable tokens: the 2 words occur in any view count text.
+    // This may be a translation error.
+    if lang == Language::Gu && string == "જોવાયાની સંખ્યા" {
+        return 0.try_into().ok();
+    }
+
    let dict_entry = dictionary::entry(lang);
+    let by_char = lang_by_char(lang) || lang == Language::Ko;
    let decimal_point = match dict_entry.comma_decimal {
        true => ',',
        false => '.',
    };

-    let (num, mut exp, filtered) = {
-        let mut buf = String::new();
-        let mut filtered = String::new();
-        let mut exp = 0;
-        let mut after_point = false;
-        for c in string.chars() {
-            if c.is_ascii_digit() {
-                buf.push(c);
+    let mut digits = String::new();
+    let mut filtered = String::new();
+    let mut exp = 0;
+    let mut after_point = false;

-                if after_point {
-                    exp -= 1;
-                }
-            } else if c == decimal_point {
-                after_point = true;
-            } else if !matches!(c, '\u{200b}' | '.' | ',') {
-                filtered.push(c);
+    for c in string.chars() {
+        if c.is_ascii_digit() {
+            digits.push(c);
+
+            if after_point {
+                exp -= 1;
            }
+        } else if c == decimal_point {
+            after_point = true;
+        } else if !matches!(
+            c,
+            '\u{200b}' | '\u{202b}' | '\u{202c}' | '\u{202e}' | '\u{200e}' | '\u{200f}' | '.' | ','
+        ) {
+            c.to_lowercase().for_each(|c| filtered.push(c));
        }
-        (buf.parse::<u64>().ok()?, exp, filtered)
-    };
-
-    let lookup_token = |token: &str| match token {
-        "K" | "k" => Some(3),
-        _ => dict_entry.number_tokens.get(token).map(|t| *t as i32),
-    };
-
-    if dict_entry.by_char {
-        exp += filtered
-            .chars()
-            .filter_map(|token| lookup_token(&token.to_string()))
-            .sum::<i32>();
-    } else {
-        exp += filtered
-            .split_whitespace()
-            .filter_map(lookup_token)
-            .sum::<i32>();
    }

-    F::try_from(num.checked_mul((10_u64).checked_pow(exp.try_into().ok()?)?)?).ok()
+    if digits.is_empty() {
+        if by_char {
+            filtered
+                .chars()
+                .find_map(|c| dict_entry.number_nd_tokens.get(&c.to_string()))
+                .and_then(|n| (*n as u64).try_into().ok())
+        } else {
+            filtered
+                .split_whitespace()
+                .find_map(|token| dict_entry.number_nd_tokens.get(token))
+                .and_then(|n| (*n as u64).try_into().ok())
+        }
+    } else {
+        let num = digits.parse::<u64>().ok()?;
+
+        let lookup_token = |token: &str| match token {
+            "k" => Some(3),
+            _ => dict_entry.number_tokens.get(token).map(|t| *t as i32),
+        };
+
+        if by_char {
+            exp += filtered
+                .chars()
+                .filter_map(|token| lookup_token(&token.to_string()))
+                .sum::<i32>();
+        } else {
+            exp += filtered
+                .split_whitespace()
+                .filter_map(lookup_token)
+                .sum::<i32>();
+        }
+
+        F::try_from(num.checked_mul((10_u64).checked_pow(exp.try_into().ok()?)?)?).ok()
+    }
+}
+
+pub fn parse_large_numstr_or_warn<F>(
+    string: &str,
+    lang: Language,
+    warnings: &mut Vec<String>,
+) -> Option<F>
+where
+    F: TryFrom<u64>,
+{
+    let res = parse_large_numstr::<F>(string, lang);
+    if res.is_none() {
+        warnings.push(format!("could not parse numstr `{string}`"));
+    }
+    res
 }

 /// Replace all html control characters to make a string safe for inserting into HTML.
@ -448,23 +505,21 @@ pub(crate) mod tests {
        assert_eq!(res, expect);
    }

-    #[test]
-    fn t_parse_large_numstr_samples() {
-        let json_path = path!(*TESTFILES / "dict" / "large_number_samples.json");
-        let json_file = File::open(json_path).unwrap();
-        let number_samples: BTreeMap<Language, BTreeMap<u8, (String, u64)>> =
-            serde_json::from_reader(BufReader::new(json_file)).unwrap();
-
-        number_samples.iter().for_each(|(lang, entry)| {
-            entry.iter().for_each(|(_, (txt, expect))| {
-                testcase_parse_large_numstr(txt, *lang, *expect);
-            });
-        });
+    #[rstest]
+    #[case(
+        Language::Iw,
+        "\u{200f}\u{202b}3.36M\u{200f}\u{202c}\u{200f} \u{200f}מנויים\u{200f}",
+        3_360_000
+    )]
+    #[case(Language::As, "১ জন গ্ৰাহক", 1)]
+    fn t_parse_large_numstr(#[case] lang: Language, #[case] string: &str, #[case] expect: u64) {
+        let res = parse_large_numstr::<u64>(string, lang).unwrap();
+        assert_eq!(res, expect);
    }

    #[test]
-    fn t_parse_large_numstr_samples2() {
-        let json_path = path!(*TESTFILES / "dict" / "large_number_samples_all.json");
+    fn t_parse_large_numstr_samples() {
+        let json_path = path!(*TESTFILES / "dict" / "large_number_samples.json");
        let json_file = File::open(json_path).unwrap();
        let number_samples: BTreeMap<Language, BTreeMap<String, u64>> =
            serde_json::from_reader(BufReader::new(json_file)).unwrap();
@ -481,12 +536,18 @@ pub(crate) mod tests {
        // in the string.
        let rounded = {
            let n_significant_d = string.chars().filter(char::is_ascii_digit).count();
-            let mag = (expect as f64).log10().floor();
-            let factor = 10_u64.pow(1 + mag as u32 - n_significant_d as u32);
-            (((expect as f64) / factor as f64).floor() as u64) * factor
+            if n_significant_d == 0 {
+                expect
+            } else {
+                let mag = (expect as f64).log10().floor();
+                let factor = 10_u64.pow(1 + mag as u32 - n_significant_d as u32);
+                (((expect as f64) / factor as f64).floor() as u64) * factor
+            }
        };

-        let res = parse_large_numstr::<u64>(string, lang).expect(string);
-        assert_eq!(res, rounded, "{string} (lang: {lang}, exact: {expect})");
+        let emsg = format!("{string} (lang: {lang}, exact: {expect})");
+
+        let res = parse_large_numstr::<u64>(string, lang).expect(&emsg);
+        assert_eq!(res, rounded, "{emsg}");
    }
 }
--- a/src/util/timeago.rs
+++ b/src/util/timeago.rs
@ -0,0 +1,808 @@
+//! Parser for textual dates and times.
+//!
+//! The YouTube API mostly outputs pre-formatted dates and times
+//! like "18 minutes ago" or "Jul 2, 2014" instead of standardized
+//! machine-readable date and time formats.
+//!
+//! Additionally these formats are localized, meaning they depend
+//! on the configured language.
+//!
+//! This module can parse these dates using an embedded dictionary which
+//! contains date/time unit tokens for all supported languages.
+
+use std::ops::Mul;
+
+use serde::{Deserialize, Serialize};
+use time::{Date, Duration, Month, OffsetDateTime};
+
+use crate::{
+    param::Language,
+    util::{self, dictionary},
+};
+
+/// Parsed TimeAgo string, contains amount and time unit.
+///
+/// Example: "14 hours ago" => `TimeAgo {n: 14, unit: TimeUnit::Hour}`
+#[derive(Debug, Copy, Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub struct TimeAgo {
+    /// Number of time units
+    pub n: u8,
+    /// Time unit
+    pub unit: TimeUnit,
+}
+
+/// Parsed date string that may be relative or absolute.
+///
+/// Examples:
+///
+/// - "Jul 2, 2014" => `ParsedDate::Absolute("2014-07-02")`
+/// - "2 months ago" => `ParsedDate::Relative(TimeAgo {n: 2, unit: TimeUnit::Month})`
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub enum ParsedDate {
+    /// Absolute date
+    ///
+    /// Example: "Jul 2, 2014"
+    Absolute(Date),
+    /// Relative date
+    ///
+    /// Example: "2 months ago"
+    Relative(TimeAgo),
+}
+
+/// Parsed time unit
+#[derive(Debug, Copy, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[serde(rename_all = "lowercase")]
+#[allow(missing_docs)]
+pub enum TimeUnit {
+    Second,
+    Minute,
+    Hour,
+    Day,
+    Week,
+    Month,
+    Year,
+}
+
+/// Value of a parsed TimeAgo token, used in the dictionary
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub struct TaToken {
+    pub n: u8,
+    pub unit: Option<TimeUnit>,
+}
+
+pub enum DateCmp {
+    Y,
+    M,
+    D,
+}
+
+impl TimeUnit {
+    pub fn secs(&self) -> i64 {
+        match self {
+            TimeUnit::Second => 1,
+            TimeUnit::Minute => 60,
+            TimeUnit::Hour => 3600,
+            TimeUnit::Day => 24 * 3600,
+            TimeUnit::Week => 7 * 24 * 3600,
+            TimeUnit::Month => 30 * 24 * 3600,
+            TimeUnit::Year => 365 * 24 * 3600,
+        }
+    }
+}
+
+impl TimeAgo {
+    fn secs(&self) -> i64 {
+        i64::from(self.n) * self.unit.secs()
+    }
+}
+
+impl Mul<u8> for TimeAgo {
+    type Output = Self;
+
+    fn mul(self, rhs: u8) -> Self::Output {
+        TimeAgo {
+            n: self.n * rhs,
+            unit: self.unit,
+        }
+    }
+}
+
+impl From<TimeAgo> for Duration {
+    fn from(ta: TimeAgo) -> Self {
+        Duration::seconds(ta.secs())
+    }
+}
+
+impl From<TimeAgo> for OffsetDateTime {
+    fn from(ta: TimeAgo) -> Self {
+        let ts = util::now_sec();
+        match ta.unit {
+            TimeUnit::Month => ts.replace_date(util::shift_months(ts.date(), -(ta.n as i32))),
+            TimeUnit::Year => ts.replace_date(util::shift_years(ts.date(), -(ta.n as i32))),
+            _ => ts - Duration::from(ta),
+        }
+    }
+}
+
+impl From<ParsedDate> for OffsetDateTime {
+    fn from(date: ParsedDate) -> Self {
+        match date {
+            ParsedDate::Absolute(date) => date.with_hms(0, 0, 0).unwrap().assume_utc(),
+            ParsedDate::Relative(timeago) => timeago.into(),
+        }
+    }
+}
+
+fn filter_str(string: &str) -> String {
+    string
+        .to_lowercase()
+        .chars()
+        .filter_map(|c| {
+            if c == '\u{200b}' || c.is_ascii_digit() {
+                None
+            } else if c == '-' {
+                Some(' ')
+            } else {
+                Some(c)
+            }
+        })
+        .collect()
+}
+
+fn parse_ta_token(
+    entry: &dictionary::Entry,
+    by_char: bool,
+    nd: bool,
+    filtered_str: &str,
+) -> Option<TimeAgo> {
+    let tokens = match nd {
+        true => &entry.timeago_nd_tokens,
+        false => &entry.timeago_tokens,
+    };
+    let mut qu = 1;
+
+    if by_char {
+        filtered_str.chars().find_map(|word| {
+            tokens.get(&word.to_string()).and_then(|t| match t.unit {
+                Some(unit) => Some(TimeAgo { n: t.n * qu, unit }),
+                None => {
+                    qu = t.n;
+                    None
+                }
+            })
+        })
+    } else {
+        filtered_str.split_whitespace().find_map(|word| {
+            tokens.get(word).and_then(|t| match t.unit {
+                Some(unit) => Some(TimeAgo { n: t.n * qu, unit }),
+                None => {
+                    qu = t.n;
+                    None
+                }
+            })
+        })
+    }
+}
+
+fn parse_ta_tokens(
+    entry: &dictionary::Entry,
+    by_char: bool,
+    nd: bool,
+    filtered_str: &str,
+) -> Vec<TimeAgo> {
+    let tokens = match nd {
+        true => &entry.timeago_nd_tokens,
+        false => &entry.timeago_tokens,
+    };
+    let mut qu = 1;
+
+    if by_char {
+        filtered_str
+            .chars()
+            .filter_map(|word| {
+                tokens.get(&word.to_string()).and_then(|t| match t.unit {
+                    Some(unit) => Some(TimeAgo { n: t.n * qu, unit }),
+                    None => {
+                        qu = t.n;
+                        None
+                    }
+                })
+            })
+            .collect()
+    } else {
+        filtered_str
+            .split_whitespace()
+            .filter_map(|word| {
+                tokens.get(word).and_then(|t| match t.unit {
+                    Some(unit) => Some(TimeAgo { n: t.n * qu, unit }),
+                    None => {
+                        qu = t.n;
+                        None
+                    }
+                })
+            })
+            .collect()
+    }
+}
+
+fn parse_textual_month(entry: &dictionary::Entry, filtered_str: &str) -> Option<u8> {
+    filtered_str
+        .split_whitespace()
+        .find_map(|word| entry.months.get(word).copied())
+}
+
+/// Parse a TimeAgo string (e.g. "29 minutes ago") into a TimeAgo object.
+///
+/// Returns [`None`] if the date could not be parsed.
+pub fn parse_timeago(lang: Language, textual_date: &str) -> Option<TimeAgo> {
+    let entry = dictionary::entry(lang);
+    let filtered_str = filter_str(textual_date);
+
+    let qu: u8 = util::parse_numeric(textual_date).unwrap_or(1);
+
+    parse_ta_token(&entry, util::lang_by_char(lang), false, &filtered_str).map(|ta| ta * qu)
+}
+
+/// Parse a TimeAgo string (e.g. "29 minutes ago") into a Chrono DateTime object.
+///
+/// Returns [`None`] if the date could not be parsed.
+pub fn parse_timeago_dt(lang: Language, textual_date: &str) -> Option<OffsetDateTime> {
+    parse_timeago(lang, textual_date).map(|ta| ta.into())
+}
+
+pub fn parse_timeago_dt_or_warn(
+    lang: Language,
+    textual_date: &str,
+    warnings: &mut Vec<String>,
+) -> Option<OffsetDateTime> {
+    let res = parse_timeago_dt(lang, textual_date);
+    if res.is_none() {
+        warnings.push(format!("could not parse timeago `{textual_date}`"));
+    }
+    res
+}
+
+/// Parse a textual date (e.g. "29 minutes ago" or "Jul 2, 2014") into a ParsedDate object.
+///
+/// Returns [`None`] if the date could not be parsed.
+pub fn parse_textual_date(lang: Language, textual_date: &str) -> Option<ParsedDate> {
+    let entry = dictionary::entry(lang);
+    let by_char = util::lang_by_char(lang);
+    let filtered_str = filter_str(textual_date);
+
+    let nums = util::parse_numeric_vec::<u16>(textual_date);
+
+    match nums.len() {
+        0 => match parse_ta_token(&entry, by_char, true, &filtered_str) {
+            Some(timeago) => Some(ParsedDate::Relative(timeago)),
+            None => parse_ta_token(&entry, by_char, false, &filtered_str).map(ParsedDate::Relative),
+        },
+        1 => parse_ta_token(&entry, by_char, false, &filtered_str)
+            .map(|timeago| ParsedDate::Relative(timeago * nums[0] as u8)),
+        2..=3 => {
+            if nums.len() == entry.date_order.len() {
+                let mut y: Option<u16> = None;
+                let mut m: Option<u16> = None;
+                let mut d: Option<u16> = None;
+
+                nums.iter()
+                    .enumerate()
+                    .for_each(|(i, n)| match entry.date_order[i] {
+                        DateCmp::Y => y = Some(*n),
+                        DateCmp::M => m = Some(*n),
+                        DateCmp::D => d = Some(*n),
+                    });
+
+                // Chinese/Japanese dont use textual months
+                if m.is_none() && !by_char {
+                    m = parse_textual_month(&entry, &filtered_str).map(|n| n as u16);
+                }
+
+                match (y, m, d) {
+                    (Some(y), Some(m), Some(d)) => Month::try_from(m as u8)
+                        .ok()
+                        .and_then(|m| Date::from_calendar_date(y.into(), m, d as u8).ok())
+                        .map(ParsedDate::Absolute),
+                    _ => None,
+                }
+            } else {
+                None
+            }
+        }
+        _ => None,
+    }
+}
+
+/// Parse a textual date (e.g. "29 minutes ago" or "Jul 2, 2014") into a Chrono DateTime object.
+///
+/// Returns None if the date could not be parsed.
+pub fn parse_textual_date_to_dt(lang: Language, textual_date: &str) -> Option<OffsetDateTime> {
+    parse_textual_date(lang, textual_date).map(|ta| ta.into())
+}
+
+pub fn parse_textual_date_or_warn(
+    lang: Language,
+    textual_date: &str,
+    warnings: &mut Vec<String>,
+) -> Option<OffsetDateTime> {
+    let res = parse_textual_date_to_dt(lang, textual_date);
+    if res.is_none() {
+        warnings.push(format!("could not parse textual date `{textual_date}`"));
+    }
+    res
+}
+
+/// Parse a textual video duration (e.g. "11 minutes, 20 seconds")
+///
+/// Returns None if the duration could not be parsed
+pub fn parse_video_duration(lang: Language, video_duration: &str) -> Option<u32> {
+    let entry = dictionary::entry(lang);
+    let by_char = util::lang_by_char(lang);
+
+    let parts = split_duration_txt(video_duration, matches!(lang, Language::Si | Language::Sw));
+    let mut secs = 0;
+
+    for part in parts {
+        let mut n = if part.digits.is_empty() {
+            1
+        } else {
+            part.digits.parse::<u32>().ok()?
+        };
+        let tokens = parse_ta_tokens(&entry, by_char, false, &part.word);
+        if tokens.is_empty() {
+            return None;
+        }
+
+        tokens.iter().for_each(|ta| {
+            secs += n * ta.secs() as u32;
+            n = 1;
+        });
+    }
+
+    Some(secs)
+}
+
+pub fn parse_video_duration_or_warn(
+    lang: Language,
+    video_duration: &str,
+    warnings: &mut Vec<String>,
+) -> Option<u32> {
+    let res = parse_video_duration(lang, video_duration);
+    if res.is_none() {
+        warnings.push(format!("could not parse video duration `{video_duration}`"));
+    }
+    res
+}
+
+#[derive(Default)]
+struct DurationTxtSegment {
+    digits: String,
+    word: String,
+}
+
+fn split_duration_txt(txt: &str, start_c: bool) -> Vec<DurationTxtSegment> {
+    let mut segments = Vec::new();
+
+    // 1: parse digits, 2: parse word
+    let mut state: u8 = 0;
+    let mut seg = DurationTxtSegment::default();
+
+    for c in txt.chars() {
+        if c.is_ascii_digit() {
+            if state == 2 && (!seg.digits.is_empty() || (!start_c && segments.is_empty())) {
+                segments.push(seg);
+                seg = DurationTxtSegment::default();
+            }
+            seg.digits.push(c);
+            state = 1;
+        } else {
+            if (state == 1) && (!seg.word.is_empty() || (start_c && segments.is_empty())) {
+                segments.push(seg);
+                seg = DurationTxtSegment::default();
+            }
+            if c != ',' {
+                c.to_lowercase().for_each(|c| seg.word.push(c));
+            }
+            state = 2;
+        }
+    }
+    if !seg.word.is_empty() || !seg.digits.is_empty() {
+        segments.push(seg);
+    }
+
+    segments
+}
+
+#[cfg(test)]
+mod tests {
+    use std::{collections::BTreeMap, fs::File, io::BufReader};
+
+    use path_macro::path;
+    use rstest::rstest;
+    use time::macros::{date, datetime};
+
+    use super::*;
+    use crate::util::tests::TESTFILES;
+
+    #[rstest]
+    #[case(Language::De, "vor 1 Sekunde", Some(TimeAgo { n: 1, unit: TimeUnit::Second }))]
+    #[case(Language::Ar, "قبل ساعة واحدة", Some(TimeAgo { n: 1, unit: TimeUnit::Hour }))]
+    // No-break space
+    #[case(Language::De, "Vor 3\u{a0}Tagen aktualisiert", Some(TimeAgo { n: 3, unit: TimeUnit::Day }))]
+    fn t_parse(
+        #[case] lang: Language,
+        #[case] textual_date: &str,
+        #[case] expect: Option<TimeAgo>,
+    ) {
+        let time_ago = parse_timeago(lang, textual_date);
+        assert_eq!(time_ago, expect);
+    }
+
+    #[test]
+    fn t_testfile() {
+        let json_path = path!(*TESTFILES / "dict" / "timeago_samples.json");
+
+        let expect = [
+            TimeAgo {
+                n: 10,
+                unit: TimeUnit::Minute,
+            },
+            TimeAgo {
+                n: 20,
+                unit: TimeUnit::Minute,
+            },
+            TimeAgo {
+                n: 1,
+                unit: TimeUnit::Hour,
+            },
+            TimeAgo {
+                n: 2,
+                unit: TimeUnit::Hour,
+            },
+            TimeAgo {
+                n: 7,
+                unit: TimeUnit::Hour,
+            },
+            TimeAgo {
+                n: 8,
+                unit: TimeUnit::Hour,
+            },
+            TimeAgo {
+                n: 9,
+                unit: TimeUnit::Hour,
+            },
+            TimeAgo {
+                n: 10,
+                unit: TimeUnit::Hour,
+            },
+            TimeAgo {
+                n: 11,
+                unit: TimeUnit::Hour,
+            },
+            TimeAgo {
+                n: 12,
+                unit: TimeUnit::Hour,
+            },
+            TimeAgo {
+                n: 13,
+                unit: TimeUnit::Hour,
+            },
+            TimeAgo {
+                n: 14,
+                unit: TimeUnit::Hour,
+            },
+            TimeAgo {
+                n: 15,
+                unit: TimeUnit::Hour,
+            },
+            TimeAgo {
+                n: 3,
+                unit: TimeUnit::Hour,
+            },
+            TimeAgo {
+                n: 4,
+                unit: TimeUnit::Hour,
+            },
+            TimeAgo {
+                n: 4,
+                unit: TimeUnit::Hour,
+            },
+            TimeAgo {
+                n: 5,
+                unit: TimeUnit::Hour,
+            },
+            TimeAgo {
+                n: 6,
+                unit: TimeUnit::Hour,
+            },
+            TimeAgo {
+                n: 6,
+                unit: TimeUnit::Hour,
+            },
+            TimeAgo {
+                n: 20,
+                unit: TimeUnit::Hour,
+            },
+            TimeAgo {
+                n: 2,
+                unit: TimeUnit::Day,
+            },
+            TimeAgo {
+                n: 3,
+                unit: TimeUnit::Day,
+            },
+            TimeAgo {
+                n: 5,
+                unit: TimeUnit::Day,
+            },
+            TimeAgo {
+                n: 6,
+                unit: TimeUnit::Day,
+            },
+            TimeAgo {
+                n: 8,
+                unit: TimeUnit::Day,
+            },
+            TimeAgo {
+                n: 10,
+                unit: TimeUnit::Day,
+            },
+            TimeAgo {
+                n: 12,
+                unit: TimeUnit::Day,
+            },
+            TimeAgo {
+                n: 2,
+                unit: TimeUnit::Week,
+            },
+            TimeAgo {
+                n: 3,
+                unit: TimeUnit::Week,
+            },
+            TimeAgo {
+                n: 4,
+                unit: TimeUnit::Week,
+            },
+            TimeAgo {
+                n: 1,
+                unit: TimeUnit::Month,
+            },
+            TimeAgo {
+                n: 8,
+                unit: TimeUnit::Month,
+            },
+            TimeAgo {
+                n: 11,
+                unit: TimeUnit::Month,
+            },
+            TimeAgo {
+                n: 1,
+                unit: TimeUnit::Year,
+            },
+            TimeAgo {
+                n: 2,
+                unit: TimeUnit::Year,
+            },
+            TimeAgo {
+                n: 3,
+                unit: TimeUnit::Year,
+            },
+            TimeAgo {
+                n: 4,
+                unit: TimeUnit::Year,
+            },
+        ];
+
+        let json_file = File::open(json_path).unwrap();
+        let strings_map: BTreeMap<Language, Vec<String>> =
+            serde_json::from_reader(BufReader::new(json_file)).unwrap();
+
+        strings_map.iter().for_each(|(lang, strings)| {
+            assert_eq!(strings.len(), expect.len());
+            strings.iter().enumerate().for_each(|(n, s)| {
+                assert_eq!(
+                    parse_timeago(*lang, s),
+                    Some(expect[n]),
+                    "Language: {lang}, n: {n}"
+                );
+            });
+        })
+    }
+
+    #[test]
+    fn t_timeago_table() {
+        #[derive(Debug, Clone, Deserialize)]
+        struct TimeagoTable {
+            entries: BTreeMap<Language, BTreeMap<TimeUnit, TimeagoTableEntry>>,
+        }
+
+        #[derive(Debug, Clone, Deserialize)]
+        struct TimeagoTableEntry {
+            cases: BTreeMap<String, u8>,
+        }
+
+        let json_path = path!(*TESTFILES / "dict" / "timeago_table.json");
+        let json_file = File::open(json_path).unwrap();
+        let timeago_table: TimeagoTable =
+            serde_json::from_reader(BufReader::new(json_file)).unwrap();
+        let mut n_cases = 0;
+
+        timeago_table.entries.iter().for_each(|(lang, entries)| {
+            entries.iter().for_each(|(t, entry)| {
+                entry.cases.iter().for_each(|(txt, n)| {
+                    let timeago = parse_timeago(*lang, txt);
+                    assert_eq!(
+                        timeago,
+                        Some(TimeAgo { n: *n, unit: *t }),
+                        "lang: {lang}, txt: {txt}"
+                    );
+
+                    n_cases += 1;
+                })
+            });
+        });
+
+        assert_eq!(n_cases, 1065)
+    }
+
+    #[rstest]
+    #[case(Language::En, "Updated today", Some(ParsedDate::Relative(TimeAgo { n: 0, unit: TimeUnit::Day })))]
+    #[case(Language::En, "Updated yesterday", Some(ParsedDate::Relative(TimeAgo { n: 1, unit: TimeUnit::Day })))]
+    #[case(Language::En, "Updated 2 days ago", Some(ParsedDate::Relative(TimeAgo { n: 2, unit: TimeUnit::Day })))]
+    #[case(Language::Si, "ඊයේ යාවත්කාලීන කරන ලදී", Some(ParsedDate::Relative(TimeAgo { n: 1, unit: TimeUnit::Day })))]
+    #[case(
+        Language::En,
+        "Last updated on Jun 04, 2003",
+        Some(ParsedDate::Absolute(date!(2003-6-4)))
+    )]
+    #[case(
+        Language::Bn,
+        "যোগ দিয়েছেন 24 সেপ, 2013",
+        Some(ParsedDate::Absolute(date!(2013-9-24)))
+    )]
+    fn t_parse_date(
+        #[case] lang: Language,
+        #[case] textual_date: &str,
+        #[case] expect: Option<ParsedDate>,
+    ) {
+        let parsed_date = parse_textual_date(lang, textual_date);
+        assert_eq!(parsed_date, expect);
+    }
+
+    #[test]
+    fn t_parse_date_samples() {
+        let json_path = path!(*TESTFILES / "dict" / "playlist_samples.json");
+        let json_file = File::open(json_path).unwrap();
+        let date_samples: BTreeMap<Language, BTreeMap<String, String>> =
+            serde_json::from_reader(BufReader::new(json_file)).unwrap();
+
+        date_samples.iter().for_each(|(lang, samples)| {
+            assert_eq!(
+                parse_textual_date(*lang, samples.get("Today").unwrap()),
+                Some(ParsedDate::Relative(TimeAgo {
+                    n: 0,
+                    unit: TimeUnit::Day
+                })),
+                "lang: {lang}"
+            );
+            assert_eq!(
+                parse_textual_date(*lang, samples.get("Yesterday").unwrap()),
+                Some(ParsedDate::Relative(TimeAgo {
+                    n: 1,
+                    unit: TimeUnit::Day
+                })),
+                "lang: {lang}"
+            );
+            assert_eq!(
+                parse_textual_date(*lang, samples.get("Ago").unwrap()),
+                Some(ParsedDate::Relative(TimeAgo {
+                    n: 5,
+                    unit: TimeUnit::Day
+                })),
+                "lang: {lang}"
+            );
+            assert_eq!(
+                parse_textual_date(*lang, samples.get("Jan").unwrap()),
+                Some(ParsedDate::Absolute(date!(2020 - 1 - 3))),
+                "lang: {lang}"
+            );
+            assert_eq!(
+                parse_textual_date(*lang, samples.get("Feb").unwrap()),
+                Some(ParsedDate::Absolute(date!(2016 - 2 - 7))),
+                "lang: {lang}"
+            );
+            assert_eq!(
+                parse_textual_date(*lang, samples.get("Mar").unwrap()),
+                Some(ParsedDate::Absolute(date!(2015 - 3 - 9))),
+                "lang: {lang}"
+            );
+            assert_eq!(
+                parse_textual_date(*lang, samples.get("Apr").unwrap()),
+                Some(ParsedDate::Absolute(date!(2017 - 4 - 2))),
+                "lang: {lang}"
+            );
+            assert_eq!(
+                parse_textual_date(*lang, samples.get("May").unwrap()),
+                Some(ParsedDate::Absolute(date!(2014 - 5 - 22))),
+                "lang: {lang}"
+            );
+            assert_eq!(
+                parse_textual_date(*lang, samples.get("Jun").unwrap()),
+                Some(ParsedDate::Absolute(date!(2014 - 6 - 28))),
+                "lang: {lang}"
+            );
+            assert_eq!(
+                parse_textual_date(*lang, samples.get("Jul").unwrap()),
+                Some(ParsedDate::Absolute(date!(2014 - 7 - 2))),
+                "lang: {lang}"
+            );
+            assert_eq!(
+                parse_textual_date(*lang, samples.get("Aug").unwrap()),
+                Some(ParsedDate::Absolute(date!(2015 - 8 - 23))),
+                "lang: {lang}"
+            );
+            assert_eq!(
+                parse_textual_date(*lang, samples.get("Sep").unwrap()),
+                Some(ParsedDate::Absolute(date!(2018 - 9 - 16))),
+                "lang: {lang}"
+            );
+            assert_eq!(
+                parse_textual_date(*lang, samples.get("Oct").unwrap()),
+                Some(ParsedDate::Absolute(date!(2014 - 10 - 31))),
+                "lang: {lang}"
+            );
+            assert_eq!(
+                parse_textual_date(*lang, samples.get("Nov").unwrap()),
+                Some(ParsedDate::Absolute(date!(2016 - 11 - 3))),
+                "lang: {lang}"
+            );
+            assert_eq!(
+                parse_textual_date(*lang, samples.get("Dec").unwrap()),
+                Some(ParsedDate::Absolute(date!(2021 - 12 - 24))),
+                "lang: {lang}"
+            );
+        })
+    }
+
+    #[test]
+    fn t_parse_video_duration() {
+        let json_path = path!(*TESTFILES / "dict" / "video_duration_samples.json");
+        let json_file = File::open(json_path).unwrap();
+        let date_samples: BTreeMap<Language, BTreeMap<String, u32>> =
+            serde_json::from_reader(BufReader::new(json_file)).unwrap();
+
+        date_samples.iter().for_each(|(lang, samples)| {
+            samples.iter().for_each(|(txt, duration)| {
+                assert_eq!(
+                    parse_video_duration(*lang, txt),
+                    Some(*duration),
+                    "lang: {lang}; txt: `{txt}`"
+                );
+            })
+        });
+    }
+
+    #[rstest]
+    #[case(Language::Ar, "19 دقيقة وثانيتان", 1142)]
+    #[case(Language::Ar, "دقيقة و13 ثانية", 73)]
+    #[case(Language::Sw, "dakika 1 na sekunde 13", 73)]
+    fn t_parse_video_duration2(
+        #[case] lang: Language,
+        #[case] video_duration: &str,
+        #[case] expect: u32,
+    ) {
+        assert_eq!(parse_video_duration(lang, video_duration), Some(expect));
+    }
+
+    #[test]
+    fn t_to_datetime() {
+        // Absolute date
+        let date = parse_textual_date_to_dt(Language::En, "Last updated on Jan 3, 2020").unwrap();
+        assert_eq!(date, datetime!(2020-1-3 0:00 +0));
+
+        // Relative date
+        let date = parse_textual_date_to_dt(Language::En, "1 year ago").unwrap();
+        let now = OffsetDateTime::now_utc();
+        assert_eq!(date.year(), now.year() - 1);
+    }
+}