This repository has been archived on 2026-05-27. You can view files and clone it, but you cannot make any changes to it's state, such as pushing and creating new issues, pull requests or comments.
rustypipe/src/util/timeago.rs
2023-11-15 01:03:03 +01:00

989 lines
29 KiB
Rust

//! Parser for textual dates and times.
//!
//! The YouTube API mostly outputs pre-formatted dates and times
//! like "18 minutes ago" or "Jul 2, 2014" instead of standardized
//! machine-readable date and time formats.
//!
//! Additionally these formats are localized, meaning they depend
//! on the configured language.
//!
//! This module can parse these dates using an embedded dictionary which
//! contains date/time unit tokens for all supported languages.
use std::ops::Mul;
use serde::{Deserialize, Serialize};
use time::{Date, Duration, Month, OffsetDateTime};
use crate::{
param::Language,
util::{self, dictionary, SplitTokens},
};
/// Parsed TimeAgo string, contains amount and time unit.
///
/// Example: "14 hours ago" => `TimeAgo {n: 14, unit: TimeUnit::Hour}`
#[derive(Debug, Copy, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct TimeAgo {
/// Number of time units
pub n: u8,
/// Time unit
pub unit: TimeUnit,
}
/// Parsed date string that may be relative or absolute.
///
/// Examples:
///
/// - "Jul 2, 2014" => `ParsedDate::Absolute("2014-07-02")`
/// - "2 months ago" => `ParsedDate::Relative(TimeAgo {n: 2, unit: TimeUnit::Month})`
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum ParsedDate {
/// Absolute date
///
/// Example: "Jul 2, 2014"
Absolute(Date),
/// Relative date
///
/// Example: "2 months ago"
Relative(TimeAgo),
}
/// Parsed time unit
#[derive(Debug, Copy, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[serde(rename_all = "lowercase")]
#[allow(missing_docs)]
pub enum TimeUnit {
Second,
Minute,
Hour,
Day,
Week,
Month,
Year,
}
/// Value of a parsed TimeAgo token, used in the dictionary
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct TaToken {
pub n: u8,
pub unit: Option<TimeUnit>,
}
pub enum DateCmp {
Y,
M,
D,
}
impl TimeUnit {
pub fn secs(self) -> u32 {
match self {
TimeUnit::Second => 1,
TimeUnit::Minute => 60,
TimeUnit::Hour => 3600,
TimeUnit::Day => 24 * 3600,
TimeUnit::Week => 7 * 24 * 3600,
TimeUnit::Month => 30 * 24 * 3600,
TimeUnit::Year => 365 * 24 * 3600,
}
}
}
impl TimeAgo {
fn secs(self) -> u32 {
u32::from(self.n) * self.unit.secs()
}
}
impl Mul<u8> for TimeAgo {
type Output = Self;
fn mul(self, rhs: u8) -> Self::Output {
TimeAgo {
n: self.n * rhs,
unit: self.unit,
}
}
}
impl From<TimeAgo> for Duration {
fn from(ta: TimeAgo) -> Self {
Duration::seconds(ta.secs().into())
}
}
impl From<TimeAgo> for OffsetDateTime {
fn from(ta: TimeAgo) -> Self {
let ts = util::now_sec();
match ta.unit {
TimeUnit::Month => ts.replace_date(util::shift_months(ts.date(), -i32::from(ta.n))),
TimeUnit::Year => ts.replace_date(util::shift_years(ts.date(), -i32::from(ta.n))),
_ => ts - Duration::from(ta),
}
}
}
impl From<ParsedDate> for OffsetDateTime {
fn from(date: ParsedDate) -> Self {
match date {
ParsedDate::Absolute(date) => date.with_hms(0, 0, 0).unwrap().assume_utc(),
ParsedDate::Relative(timeago) => timeago.into(),
}
}
}
/// Prepare the datestring for parsing: lowercase and filter out unnecessary punctuation
fn filter_datestr(string: &str) -> String {
string
.to_lowercase()
.chars()
.filter_map(|c| {
if matches!(c, '\u{200b}' | '.') || c.is_ascii_digit() {
None
} else if c == '-' {
Some(' ')
} else {
Some(c)
}
})
.collect()
}
struct TaTokenParser<'a> {
iter: SplitTokens<'a>,
tokens: &'a phf::Map<&'static str, TaToken>,
}
impl<'a> TaTokenParser<'a> {
fn new(entry: &'a dictionary::Entry, by_char: bool, nd: bool, filtered_str: &'a str) -> Self {
let tokens = if nd {
&entry.timeago_nd_tokens
} else {
&entry.timeago_tokens
};
Self {
iter: SplitTokens::new(filtered_str, by_char),
tokens,
}
}
}
impl<'a> Iterator for TaTokenParser<'a> {
type Item = TimeAgo;
fn next(&mut self) -> Option<Self::Item> {
// Quantity for parsing separate quantity + unit tokens
let mut qu = 1;
self.iter.find_map(|word| {
self.tokens.get(word).and_then(|t| match t.unit {
Some(unit) => Some(TimeAgo { n: t.n * qu, unit }),
None => {
qu = t.n;
None
}
})
})
}
}
fn parse_textual_month(entry: &dictionary::Entry, filtered_str: &str) -> Option<u8> {
filtered_str
.split_whitespace()
.find_map(|word| entry.months.get(word).copied())
}
/// Parse a TimeAgo string (e.g. "29 minutes ago") into a TimeAgo object.
///
/// Returns [`None`] if the date could not be parsed.
pub fn parse_timeago(lang: Language, textual_date: &str) -> Option<TimeAgo> {
let entry = dictionary::entry(lang);
let filtered_str = filter_datestr(textual_date);
let qu: u8 = util::parse_numeric_prod(textual_date).unwrap_or(1);
// French uses 'a' as a short form of years.
// Since 'a' is also a word in French, it cannot be parsed as a token.
if matches!(
lang,
Language::Fr | Language::FrCa | Language::Es | Language::Es419 | Language::EsUs
) && textual_date.ends_with(" a")
{
return Some(TimeAgo {
n: qu,
unit: TimeUnit::Year,
});
}
TaTokenParser::new(&entry, util::lang_by_char(lang), false, &filtered_str)
.next()
.map(|ta| ta * qu)
}
/// Parse a TimeAgo string (e.g. "29 minutes ago") into a Chrono DateTime object.
///
/// Returns [`None`] if the date could not be parsed.
pub fn parse_timeago_dt(lang: Language, textual_date: &str) -> Option<OffsetDateTime> {
parse_timeago(lang, textual_date).map(OffsetDateTime::from)
}
pub fn parse_timeago_dt_or_warn(
lang: Language,
textual_date: &str,
warnings: &mut Vec<String>,
) -> Option<OffsetDateTime> {
let res = parse_timeago_dt(lang, textual_date);
if res.is_none() {
warnings.push(format!("could not parse timeago `{textual_date}`"));
}
res
}
/// Parse a textual date (e.g. "29 minutes ago" or "Jul 2, 2014") into a ParsedDate object.
///
/// Returns [`None`] if the date could not be parsed.
pub fn parse_textual_date(lang: Language, textual_date: &str) -> Option<ParsedDate> {
let entry = dictionary::entry(lang);
let by_char = util::lang_by_char(lang);
let filtered_str = filter_datestr(textual_date);
let nums = util::parse_numeric_vec::<u16>(textual_date);
match nums.len() {
0 => match TaTokenParser::new(&entry, by_char, true, &filtered_str).next() {
Some(timeago) => Some(ParsedDate::Relative(timeago)),
None => TaTokenParser::new(&entry, by_char, false, &filtered_str)
.next()
.map(ParsedDate::Relative),
},
1 => TaTokenParser::new(&entry, by_char, false, &filtered_str)
.next()
.map(|timeago| ParsedDate::Relative(timeago * nums[0] as u8)),
2..=3 => {
if nums.len() == entry.date_order.len() {
let mut y: Option<u16> = None;
let mut m: Option<u16> = None;
let mut d: Option<u16> = None;
nums.iter()
.enumerate()
.for_each(|(i, n)| match entry.date_order[i] {
DateCmp::Y => y = Some(*n),
DateCmp::M => m = Some(*n),
DateCmp::D => d = Some(*n),
});
// Chinese/Japanese dont use textual months
if m.is_none() && !by_char {
m = parse_textual_month(&entry, &filtered_str).map(u16::from);
}
match (y, m, d) {
(Some(y), Some(m), Some(d)) => Month::try_from(m as u8)
.ok()
.and_then(|m| Date::from_calendar_date(y.into(), m, d as u8).ok())
.map(ParsedDate::Absolute),
_ => None,
}
} else {
None
}
}
_ => None,
}
}
/// Parse a textual date (e.g. "29 minutes ago" or "Jul 2, 2014") into a Chrono DateTime object.
///
/// Returns None if the date could not be parsed.
pub fn parse_textual_date_to_dt(lang: Language, textual_date: &str) -> Option<OffsetDateTime> {
parse_textual_date(lang, textual_date).map(OffsetDateTime::from)
}
pub fn parse_textual_date_or_warn(
lang: Language,
textual_date: &str,
warnings: &mut Vec<String>,
) -> Option<OffsetDateTime> {
let res = parse_textual_date_to_dt(lang, textual_date);
if res.is_none() {
warnings.push(format!("could not parse textual date `{textual_date}`"));
}
res
}
/// Parse a textual video duration (e.g. "11 minutes, 20 seconds")
///
/// Returns None if the duration could not be parsed
pub fn parse_video_duration(lang: Language, video_duration: &str) -> Option<u32> {
let entry = dictionary::entry(lang);
let by_char = util::lang_by_char(lang);
let parts = split_duration_txt(video_duration, matches!(lang, Language::Si | Language::Sw));
let mut secs = 0;
for part in parts {
let mut n = if part.digits.is_empty() {
1
} else {
part.digits.parse::<u32>().ok()?
};
let mut tokens = TaTokenParser::new(&entry, by_char, false, &part.word).peekable();
tokens.peek()?;
tokens.for_each(|ta| {
secs += n * ta.secs();
n = 1;
});
}
Some(secs)
}
pub fn parse_video_duration_or_warn(
lang: Language,
video_duration: &str,
warnings: &mut Vec<String>,
) -> Option<u32> {
let res = parse_video_duration(lang, video_duration);
if res.is_none() {
warnings.push(format!("could not parse video duration `{video_duration}`"));
}
res
}
#[derive(Default)]
struct DurationTxtSegment {
digits: String,
word: String,
}
/// Split a video duration string into its segments.
///
/// Each segment consists of a word and a string of digits (one of them may be empty).
///
/// The `start_word` parameter determines whether the segments should start with a word
/// instead of a number. This is the case in Swahili and Singhalese.
///
/// Example (start_word=false):
/// - `1 minute, 13 seconds` -> `{1;minute} {13;seconds}`
/// - `foo 1 minute, 13 seconds bar` -> `{foo} {1;minute} {13;seconds bar}`
///
/// Example (start_word=true):
/// - `dakika 1 na sekunde 1` -> `{1;dakika} {1;na sekunde}`
/// - `foo dakika 1 na sekunde 1 bar` -> `{1;foo dakika} {1;na sekunde} {bar}`
fn split_duration_txt(txt: &str, start_word: bool) -> Vec<DurationTxtSegment> {
let mut segments = Vec::new();
// 1: parse digits, 2: parse word
let mut state: u8 = 0;
let mut seg = DurationTxtSegment::default();
for c in txt.trim().chars() {
if c.is_ascii_digit() {
if state == 2 && (!seg.digits.is_empty() || (!start_word && segments.is_empty())) {
segments.push(seg);
seg = DurationTxtSegment::default();
}
seg.digits.push(c);
state = 1;
} else {
if (state == 1) && (!seg.word.is_empty() || (start_word && segments.is_empty())) {
segments.push(seg);
seg = DurationTxtSegment::default();
}
if !matches!(c, '.' | ',') {
c.to_lowercase().for_each(|c| seg.word.push(c));
}
state = 2;
}
}
if !seg.word.is_empty() || !seg.digits.is_empty() {
segments.push(seg);
}
segments
}
#[cfg(test)]
mod tests {
use std::{collections::BTreeMap, fs::File, io::BufReader};
use path_macro::path;
use rstest::rstest;
use time::macros::{date, datetime};
use super::*;
use crate::util::tests::TESTFILES;
#[rstest]
#[case::de(Language::De, "vor 1 Sekunde", Some(TimeAgo { n: 1, unit: TimeUnit::Second }))]
#[case::ar(Language::Ar, "قبل ساعة واحدة", Some(TimeAgo { n: 1, unit: TimeUnit::Hour }))]
// No-break space
#[case::nbsp(Language::De, "Vor 3\u{a0}Tagen aktualisiert", Some(TimeAgo { n: 3, unit: TimeUnit::Day }))]
fn t_parse(
#[case] lang: Language,
#[case] textual_date: &str,
#[case] expect: Option<TimeAgo>,
) {
let time_ago = parse_timeago(lang, textual_date);
assert_eq!(time_ago, expect);
}
#[test]
fn t_testfile() {
let json_path = path!(*TESTFILES / "dict" / "timeago_samples.json");
let expect = [
TimeAgo {
n: 10,
unit: TimeUnit::Minute,
},
TimeAgo {
n: 20,
unit: TimeUnit::Minute,
},
TimeAgo {
n: 1,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 2,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 7,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 8,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 9,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 10,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 11,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 12,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 13,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 14,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 15,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 3,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 4,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 4,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 5,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 6,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 6,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 20,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 2,
unit: TimeUnit::Day,
},
TimeAgo {
n: 3,
unit: TimeUnit::Day,
},
TimeAgo {
n: 5,
unit: TimeUnit::Day,
},
TimeAgo {
n: 6,
unit: TimeUnit::Day,
},
TimeAgo {
n: 8,
unit: TimeUnit::Day,
},
TimeAgo {
n: 10,
unit: TimeUnit::Day,
},
TimeAgo {
n: 12,
unit: TimeUnit::Day,
},
TimeAgo {
n: 2,
unit: TimeUnit::Week,
},
TimeAgo {
n: 3,
unit: TimeUnit::Week,
},
TimeAgo {
n: 4,
unit: TimeUnit::Week,
},
TimeAgo {
n: 1,
unit: TimeUnit::Month,
},
TimeAgo {
n: 8,
unit: TimeUnit::Month,
},
TimeAgo {
n: 11,
unit: TimeUnit::Month,
},
TimeAgo {
n: 1,
unit: TimeUnit::Year,
},
TimeAgo {
n: 2,
unit: TimeUnit::Year,
},
TimeAgo {
n: 3,
unit: TimeUnit::Year,
},
TimeAgo {
n: 4,
unit: TimeUnit::Year,
},
];
let json_file = File::open(json_path).unwrap();
let strings_map: BTreeMap<Language, Vec<String>> =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
for (lang, strings) in &strings_map {
assert_eq!(strings.len(), expect.len());
strings.iter().enumerate().for_each(|(n, s)| {
assert_eq!(
parse_timeago(*lang, s),
Some(expect[n]),
"Language: {lang}, txt: `{s}`"
);
});
}
}
#[test]
fn t_testfile_short() {
let json_path = path!(*TESTFILES / "dict" / "timeago_samples_short.json");
let expect = [
TimeAgo {
n: 35,
unit: TimeUnit::Minute,
},
TimeAgo {
n: 50,
unit: TimeUnit::Minute,
},
TimeAgo {
n: 1,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 2,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 3,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 4,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 5,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 6,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 7,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 8,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 9,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 12,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 17,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 18,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 19,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 20,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 10,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 11,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 13,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 1,
unit: TimeUnit::Day,
},
TimeAgo {
n: 2,
unit: TimeUnit::Day,
},
TimeAgo {
n: 3,
unit: TimeUnit::Day,
},
TimeAgo {
n: 4,
unit: TimeUnit::Day,
},
TimeAgo {
n: 6,
unit: TimeUnit::Day,
},
TimeAgo {
n: 8,
unit: TimeUnit::Day,
},
TimeAgo {
n: 10,
unit: TimeUnit::Day,
},
TimeAgo {
n: 11,
unit: TimeUnit::Day,
},
TimeAgo {
n: 12,
unit: TimeUnit::Day,
},
TimeAgo {
n: 13,
unit: TimeUnit::Day,
},
TimeAgo {
n: 2,
unit: TimeUnit::Week,
},
TimeAgo {
n: 3,
unit: TimeUnit::Week,
},
TimeAgo {
n: 1,
unit: TimeUnit::Month,
},
TimeAgo {
n: 4,
unit: TimeUnit::Week,
},
TimeAgo {
n: 7,
unit: TimeUnit::Month,
},
TimeAgo {
n: 10,
unit: TimeUnit::Month,
},
TimeAgo {
n: 1,
unit: TimeUnit::Year,
},
TimeAgo {
n: 2,
unit: TimeUnit::Year,
},
TimeAgo {
n: 3,
unit: TimeUnit::Year,
},
TimeAgo {
n: 4,
unit: TimeUnit::Year,
},
TimeAgo {
n: 5,
unit: TimeUnit::Year,
},
];
let json_file = File::open(json_path).unwrap();
let strings_map: BTreeMap<Language, Vec<String>> =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
for (lang, strings) in &strings_map {
assert_eq!(strings.len(), expect.len(), "Language: {lang}");
strings.iter().enumerate().for_each(|(n, s)| {
let mut exp = expect[n];
if *lang == Language::Mn && exp.unit == TimeUnit::Week {
exp.unit = TimeUnit::Day;
exp.n *= 7;
}
assert_eq!(
parse_timeago(*lang, s),
Some(exp),
"Language: {lang}, txt: `{s}`"
);
});
}
}
#[test]
fn t_timeago_table() {
#[derive(Debug, Clone, Deserialize)]
struct TimeagoTable {
entries: BTreeMap<Language, BTreeMap<TimeUnit, TimeagoTableEntry>>,
}
#[derive(Debug, Clone, Deserialize)]
struct TimeagoTableEntry {
cases: BTreeMap<String, u8>,
}
let json_path = path!(*TESTFILES / "dict" / "timeago_table.json");
let json_file = File::open(json_path).unwrap();
let timeago_table: TimeagoTable =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let mut n_cases = 0;
timeago_table.entries.iter().for_each(|(lang, entries)| {
for (t, entry) in entries {
entry.cases.iter().for_each(|(txt, n)| {
let timeago = parse_timeago(*lang, txt);
assert_eq!(
timeago,
Some(TimeAgo { n: *n, unit: *t }),
"lang: {lang}, txt: {txt}"
);
n_cases += 1;
});
}
});
assert_eq!(n_cases, 1065);
}
#[rstest]
#[case(Language::En, "Updated today", Some(ParsedDate::Relative(TimeAgo { n: 0, unit: TimeUnit::Day })))]
#[case(Language::En, "Updated yesterday", Some(ParsedDate::Relative(TimeAgo { n: 1, unit: TimeUnit::Day })))]
#[case(Language::En, "Updated 2 days ago", Some(ParsedDate::Relative(TimeAgo { n: 2, unit: TimeUnit::Day })))]
#[case(Language::Si, "ඊයේ යාවත්කාලීන කරන ලදී", Some(ParsedDate::Relative(TimeAgo { n: 1, unit: TimeUnit::Day })))]
#[case(
Language::En,
"Last updated on Jun 04, 2003",
Some(ParsedDate::Absolute(date!(2003-6-4)))
)]
#[case(
Language::Bn,
"যোগ দিয়েছেন 24 সেপ, 2013",
Some(ParsedDate::Absolute(date!(2013-9-24)))
)]
fn t_parse_date(
#[case] lang: Language,
#[case] textual_date: &str,
#[case] expect: Option<ParsedDate>,
) {
let parsed_date = parse_textual_date(lang, textual_date);
assert_eq!(parsed_date, expect);
}
#[test]
fn t_parse_date_samples() {
let json_path = path!(*TESTFILES / "dict" / "playlist_samples.json");
let json_file = File::open(json_path).unwrap();
let date_samples: BTreeMap<Language, BTreeMap<String, String>> =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
for (lang, samples) in &date_samples {
assert_eq!(
parse_textual_date(*lang, samples.get("Today").unwrap()),
Some(ParsedDate::Relative(TimeAgo {
n: 0,
unit: TimeUnit::Day
})),
"lang: {lang}"
);
assert_eq!(
parse_textual_date(*lang, samples.get("Yesterday").unwrap()),
Some(ParsedDate::Relative(TimeAgo {
n: 1,
unit: TimeUnit::Day
})),
"lang: {lang}"
);
assert_eq!(
parse_textual_date(*lang, samples.get("Ago").unwrap()),
Some(ParsedDate::Relative(TimeAgo {
n: 5,
unit: TimeUnit::Day
})),
"lang: {lang}"
);
assert_eq!(
parse_textual_date(*lang, samples.get("Jan").unwrap()),
Some(ParsedDate::Absolute(date!(2020 - 1 - 3))),
"lang: {lang}"
);
assert_eq!(
parse_textual_date(*lang, samples.get("Feb").unwrap()),
Some(ParsedDate::Absolute(date!(2016 - 2 - 7))),
"lang: {lang}"
);
assert_eq!(
parse_textual_date(*lang, samples.get("Mar").unwrap()),
Some(ParsedDate::Absolute(date!(2015 - 3 - 9))),
"lang: {lang}"
);
assert_eq!(
parse_textual_date(*lang, samples.get("Apr").unwrap()),
Some(ParsedDate::Absolute(date!(2017 - 4 - 2))),
"lang: {lang}"
);
assert_eq!(
parse_textual_date(*lang, samples.get("May").unwrap()),
Some(ParsedDate::Absolute(date!(2014 - 5 - 22))),
"lang: {lang}"
);
assert_eq!(
parse_textual_date(*lang, samples.get("Jun").unwrap()),
Some(ParsedDate::Absolute(date!(2014 - 6 - 28))),
"lang: {lang}"
);
assert_eq!(
parse_textual_date(*lang, samples.get("Jul").unwrap()),
Some(ParsedDate::Absolute(date!(2014 - 7 - 2))),
"lang: {lang}"
);
assert_eq!(
parse_textual_date(*lang, samples.get("Aug").unwrap()),
Some(ParsedDate::Absolute(date!(2015 - 8 - 23))),
"lang: {lang}"
);
assert_eq!(
parse_textual_date(*lang, samples.get("Sep").unwrap()),
Some(ParsedDate::Absolute(date!(2018 - 9 - 16))),
"lang: {lang}"
);
assert_eq!(
parse_textual_date(*lang, samples.get("Oct").unwrap()),
Some(ParsedDate::Absolute(date!(2014 - 10 - 31))),
"lang: {lang}"
);
assert_eq!(
parse_textual_date(*lang, samples.get("Nov").unwrap()),
Some(ParsedDate::Absolute(date!(2016 - 11 - 3))),
"lang: {lang}"
);
assert_eq!(
parse_textual_date(*lang, samples.get("Dec").unwrap()),
Some(ParsedDate::Absolute(date!(2021 - 12 - 24))),
"lang: {lang}"
);
}
}
#[test]
fn t_parse_video_duration() {
let json_path = path!(*TESTFILES / "dict" / "video_duration_samples.json");
let json_file = File::open(json_path).unwrap();
let date_samples: BTreeMap<Language, BTreeMap<String, u32>> =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
for (lang, samples) in &date_samples {
for (txt, duration) in samples {
assert_eq!(
parse_video_duration(*lang, txt),
Some(*duration),
"lang: {lang}; txt: `{txt}`"
);
}
}
}
#[rstest]
#[case(Language::Ar, "19 دقيقة وثانيتان", 1142)]
#[case(Language::Ar, "دقيقة و13 ثانية", 73)]
#[case(Language::Sw, "dakika 1 na sekunde 13", 73)]
fn t_parse_video_duration2(
#[case] lang: Language,
#[case] video_duration: &str,
#[case] expect: u32,
) {
assert_eq!(parse_video_duration(lang, video_duration), Some(expect));
}
#[test]
fn t_to_datetime() {
// Absolute date
let date = parse_textual_date_to_dt(Language::En, "Last updated on Jan 3, 2020").unwrap();
assert_eq!(date, datetime!(2020-1-3 0:00 +0));
// Relative date
let date = parse_textual_date_to_dt(Language::En, "1 year ago").unwrap();
let now = OffsetDateTime::now_utc();
assert_eq!(date.year(), now.year() - 1);
}
}