989 lines
29 KiB
Rust
989 lines
29 KiB
Rust
//! Parser for textual dates and times.
|
|
//!
|
|
//! The YouTube API mostly outputs pre-formatted dates and times
|
|
//! like "18 minutes ago" or "Jul 2, 2014" instead of standardized
|
|
//! machine-readable date and time formats.
|
|
//!
|
|
//! Additionally these formats are localized, meaning they depend
|
|
//! on the configured language.
|
|
//!
|
|
//! This module can parse these dates using an embedded dictionary which
|
|
//! contains date/time unit tokens for all supported languages.
|
|
|
|
use std::ops::Mul;
|
|
|
|
use serde::{Deserialize, Serialize};
|
|
use time::{Date, Duration, Month, OffsetDateTime};
|
|
|
|
use crate::{
|
|
param::Language,
|
|
util::{self, dictionary, SplitTokens},
|
|
};
|
|
|
|
/// Parsed TimeAgo string, contains amount and time unit.
|
|
///
|
|
/// Example: "14 hours ago" => `TimeAgo {n: 14, unit: TimeUnit::Hour}`
|
|
#[derive(Debug, Copy, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
|
pub struct TimeAgo {
|
|
/// Number of time units
|
|
pub n: u8,
|
|
/// Time unit
|
|
pub unit: TimeUnit,
|
|
}
|
|
|
|
/// Parsed date string that may be relative or absolute.
|
|
///
|
|
/// Examples:
|
|
///
|
|
/// - "Jul 2, 2014" => `ParsedDate::Absolute("2014-07-02")`
|
|
/// - "2 months ago" => `ParsedDate::Relative(TimeAgo {n: 2, unit: TimeUnit::Month})`
|
|
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
|
pub enum ParsedDate {
|
|
/// Absolute date
|
|
///
|
|
/// Example: "Jul 2, 2014"
|
|
Absolute(Date),
|
|
/// Relative date
|
|
///
|
|
/// Example: "2 months ago"
|
|
Relative(TimeAgo),
|
|
}
|
|
|
|
/// Parsed time unit
|
|
#[derive(Debug, Copy, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
|
#[serde(rename_all = "lowercase")]
|
|
#[allow(missing_docs)]
|
|
pub enum TimeUnit {
|
|
Second,
|
|
Minute,
|
|
Hour,
|
|
Day,
|
|
Week,
|
|
Month,
|
|
Year,
|
|
}
|
|
|
|
/// Value of a parsed TimeAgo token, used in the dictionary
|
|
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
|
pub struct TaToken {
|
|
pub n: u8,
|
|
pub unit: Option<TimeUnit>,
|
|
}
|
|
|
|
pub enum DateCmp {
|
|
Y,
|
|
M,
|
|
D,
|
|
}
|
|
|
|
impl TimeUnit {
|
|
pub fn secs(self) -> u32 {
|
|
match self {
|
|
TimeUnit::Second => 1,
|
|
TimeUnit::Minute => 60,
|
|
TimeUnit::Hour => 3600,
|
|
TimeUnit::Day => 24 * 3600,
|
|
TimeUnit::Week => 7 * 24 * 3600,
|
|
TimeUnit::Month => 30 * 24 * 3600,
|
|
TimeUnit::Year => 365 * 24 * 3600,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl TimeAgo {
|
|
fn secs(self) -> u32 {
|
|
u32::from(self.n) * self.unit.secs()
|
|
}
|
|
}
|
|
|
|
impl Mul<u8> for TimeAgo {
|
|
type Output = Self;
|
|
|
|
fn mul(self, rhs: u8) -> Self::Output {
|
|
TimeAgo {
|
|
n: self.n * rhs,
|
|
unit: self.unit,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<TimeAgo> for Duration {
|
|
fn from(ta: TimeAgo) -> Self {
|
|
Duration::seconds(ta.secs().into())
|
|
}
|
|
}
|
|
|
|
impl From<TimeAgo> for OffsetDateTime {
|
|
fn from(ta: TimeAgo) -> Self {
|
|
let ts = util::now_sec();
|
|
match ta.unit {
|
|
TimeUnit::Month => ts.replace_date(util::shift_months(ts.date(), -i32::from(ta.n))),
|
|
TimeUnit::Year => ts.replace_date(util::shift_years(ts.date(), -i32::from(ta.n))),
|
|
_ => ts - Duration::from(ta),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<ParsedDate> for OffsetDateTime {
|
|
fn from(date: ParsedDate) -> Self {
|
|
match date {
|
|
ParsedDate::Absolute(date) => date.with_hms(0, 0, 0).unwrap().assume_utc(),
|
|
ParsedDate::Relative(timeago) => timeago.into(),
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Prepare the datestring for parsing: lowercase and filter out unnecessary punctuation
|
|
fn filter_datestr(string: &str) -> String {
|
|
string
|
|
.to_lowercase()
|
|
.chars()
|
|
.filter_map(|c| {
|
|
if matches!(c, '\u{200b}' | '.') || c.is_ascii_digit() {
|
|
None
|
|
} else if c == '-' {
|
|
Some(' ')
|
|
} else {
|
|
Some(c)
|
|
}
|
|
})
|
|
.collect()
|
|
}
|
|
|
|
struct TaTokenParser<'a> {
|
|
iter: SplitTokens<'a>,
|
|
tokens: &'a phf::Map<&'static str, TaToken>,
|
|
}
|
|
|
|
impl<'a> TaTokenParser<'a> {
|
|
fn new(entry: &'a dictionary::Entry, by_char: bool, nd: bool, filtered_str: &'a str) -> Self {
|
|
let tokens = if nd {
|
|
&entry.timeago_nd_tokens
|
|
} else {
|
|
&entry.timeago_tokens
|
|
};
|
|
Self {
|
|
iter: SplitTokens::new(filtered_str, by_char),
|
|
tokens,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'a> Iterator for TaTokenParser<'a> {
|
|
type Item = TimeAgo;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
// Quantity for parsing separate quantity + unit tokens
|
|
let mut qu = 1;
|
|
self.iter.find_map(|word| {
|
|
self.tokens.get(word).and_then(|t| match t.unit {
|
|
Some(unit) => Some(TimeAgo { n: t.n * qu, unit }),
|
|
None => {
|
|
qu = t.n;
|
|
None
|
|
}
|
|
})
|
|
})
|
|
}
|
|
}
|
|
|
|
fn parse_textual_month(entry: &dictionary::Entry, filtered_str: &str) -> Option<u8> {
|
|
filtered_str
|
|
.split_whitespace()
|
|
.find_map(|word| entry.months.get(word).copied())
|
|
}
|
|
|
|
/// Parse a TimeAgo string (e.g. "29 minutes ago") into a TimeAgo object.
|
|
///
|
|
/// Returns [`None`] if the date could not be parsed.
|
|
pub fn parse_timeago(lang: Language, textual_date: &str) -> Option<TimeAgo> {
|
|
let entry = dictionary::entry(lang);
|
|
let filtered_str = filter_datestr(textual_date);
|
|
|
|
let qu: u8 = util::parse_numeric_prod(textual_date).unwrap_or(1);
|
|
|
|
// French uses 'a' as a short form of years.
|
|
// Since 'a' is also a word in French, it cannot be parsed as a token.
|
|
if matches!(
|
|
lang,
|
|
Language::Fr | Language::FrCa | Language::Es | Language::Es419 | Language::EsUs
|
|
) && textual_date.ends_with(" a")
|
|
{
|
|
return Some(TimeAgo {
|
|
n: qu,
|
|
unit: TimeUnit::Year,
|
|
});
|
|
}
|
|
|
|
TaTokenParser::new(&entry, util::lang_by_char(lang), false, &filtered_str)
|
|
.next()
|
|
.map(|ta| ta * qu)
|
|
}
|
|
|
|
/// Parse a TimeAgo string (e.g. "29 minutes ago") into a Chrono DateTime object.
|
|
///
|
|
/// Returns [`None`] if the date could not be parsed.
|
|
pub fn parse_timeago_dt(lang: Language, textual_date: &str) -> Option<OffsetDateTime> {
|
|
parse_timeago(lang, textual_date).map(OffsetDateTime::from)
|
|
}
|
|
|
|
pub fn parse_timeago_dt_or_warn(
|
|
lang: Language,
|
|
textual_date: &str,
|
|
warnings: &mut Vec<String>,
|
|
) -> Option<OffsetDateTime> {
|
|
let res = parse_timeago_dt(lang, textual_date);
|
|
if res.is_none() {
|
|
warnings.push(format!("could not parse timeago `{textual_date}`"));
|
|
}
|
|
res
|
|
}
|
|
|
|
/// Parse a textual date (e.g. "29 minutes ago" or "Jul 2, 2014") into a ParsedDate object.
|
|
///
|
|
/// Returns [`None`] if the date could not be parsed.
|
|
pub fn parse_textual_date(lang: Language, textual_date: &str) -> Option<ParsedDate> {
|
|
let entry = dictionary::entry(lang);
|
|
let by_char = util::lang_by_char(lang);
|
|
let filtered_str = filter_datestr(textual_date);
|
|
|
|
let nums = util::parse_numeric_vec::<u16>(textual_date);
|
|
|
|
match nums.len() {
|
|
0 => match TaTokenParser::new(&entry, by_char, true, &filtered_str).next() {
|
|
Some(timeago) => Some(ParsedDate::Relative(timeago)),
|
|
None => TaTokenParser::new(&entry, by_char, false, &filtered_str)
|
|
.next()
|
|
.map(ParsedDate::Relative),
|
|
},
|
|
1 => TaTokenParser::new(&entry, by_char, false, &filtered_str)
|
|
.next()
|
|
.map(|timeago| ParsedDate::Relative(timeago * nums[0] as u8)),
|
|
2..=3 => {
|
|
if nums.len() == entry.date_order.len() {
|
|
let mut y: Option<u16> = None;
|
|
let mut m: Option<u16> = None;
|
|
let mut d: Option<u16> = None;
|
|
|
|
nums.iter()
|
|
.enumerate()
|
|
.for_each(|(i, n)| match entry.date_order[i] {
|
|
DateCmp::Y => y = Some(*n),
|
|
DateCmp::M => m = Some(*n),
|
|
DateCmp::D => d = Some(*n),
|
|
});
|
|
|
|
// Chinese/Japanese dont use textual months
|
|
if m.is_none() && !by_char {
|
|
m = parse_textual_month(&entry, &filtered_str).map(u16::from);
|
|
}
|
|
|
|
match (y, m, d) {
|
|
(Some(y), Some(m), Some(d)) => Month::try_from(m as u8)
|
|
.ok()
|
|
.and_then(|m| Date::from_calendar_date(y.into(), m, d as u8).ok())
|
|
.map(ParsedDate::Absolute),
|
|
_ => None,
|
|
}
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
/// Parse a textual date (e.g. "29 minutes ago" or "Jul 2, 2014") into a Chrono DateTime object.
|
|
///
|
|
/// Returns None if the date could not be parsed.
|
|
pub fn parse_textual_date_to_dt(lang: Language, textual_date: &str) -> Option<OffsetDateTime> {
|
|
parse_textual_date(lang, textual_date).map(OffsetDateTime::from)
|
|
}
|
|
|
|
pub fn parse_textual_date_or_warn(
|
|
lang: Language,
|
|
textual_date: &str,
|
|
warnings: &mut Vec<String>,
|
|
) -> Option<OffsetDateTime> {
|
|
let res = parse_textual_date_to_dt(lang, textual_date);
|
|
if res.is_none() {
|
|
warnings.push(format!("could not parse textual date `{textual_date}`"));
|
|
}
|
|
res
|
|
}
|
|
|
|
/// Parse a textual video duration (e.g. "11 minutes, 20 seconds")
|
|
///
|
|
/// Returns None if the duration could not be parsed
|
|
pub fn parse_video_duration(lang: Language, video_duration: &str) -> Option<u32> {
|
|
let entry = dictionary::entry(lang);
|
|
let by_char = util::lang_by_char(lang);
|
|
|
|
let parts = split_duration_txt(video_duration, matches!(lang, Language::Si | Language::Sw));
|
|
let mut secs = 0;
|
|
|
|
for part in parts {
|
|
let mut n = if part.digits.is_empty() {
|
|
1
|
|
} else {
|
|
part.digits.parse::<u32>().ok()?
|
|
};
|
|
let mut tokens = TaTokenParser::new(&entry, by_char, false, &part.word).peekable();
|
|
tokens.peek()?;
|
|
|
|
tokens.for_each(|ta| {
|
|
secs += n * ta.secs();
|
|
n = 1;
|
|
});
|
|
}
|
|
|
|
Some(secs)
|
|
}
|
|
|
|
pub fn parse_video_duration_or_warn(
|
|
lang: Language,
|
|
video_duration: &str,
|
|
warnings: &mut Vec<String>,
|
|
) -> Option<u32> {
|
|
let res = parse_video_duration(lang, video_duration);
|
|
if res.is_none() {
|
|
warnings.push(format!("could not parse video duration `{video_duration}`"));
|
|
}
|
|
res
|
|
}
|
|
|
|
#[derive(Default)]
|
|
struct DurationTxtSegment {
|
|
digits: String,
|
|
word: String,
|
|
}
|
|
|
|
/// Split a video duration string into its segments.
|
|
///
|
|
/// Each segment consists of a word and a string of digits (one of them may be empty).
|
|
///
|
|
/// The `start_word` parameter determines whether the segments should start with a word
|
|
/// instead of a number. This is the case in Swahili and Singhalese.
|
|
///
|
|
/// Example (start_word=false):
|
|
/// - `1 minute, 13 seconds` -> `{1;minute} {13;seconds}`
|
|
/// - `foo 1 minute, 13 seconds bar` -> `{foo} {1;minute} {13;seconds bar}`
|
|
///
|
|
/// Example (start_word=true):
|
|
/// - `dakika 1 na sekunde 1` -> `{1;dakika} {1;na sekunde}`
|
|
/// - `foo dakika 1 na sekunde 1 bar` -> `{1;foo dakika} {1;na sekunde} {bar}`
|
|
fn split_duration_txt(txt: &str, start_word: bool) -> Vec<DurationTxtSegment> {
|
|
let mut segments = Vec::new();
|
|
|
|
// 1: parse digits, 2: parse word
|
|
let mut state: u8 = 0;
|
|
let mut seg = DurationTxtSegment::default();
|
|
|
|
for c in txt.trim().chars() {
|
|
if c.is_ascii_digit() {
|
|
if state == 2 && (!seg.digits.is_empty() || (!start_word && segments.is_empty())) {
|
|
segments.push(seg);
|
|
seg = DurationTxtSegment::default();
|
|
}
|
|
seg.digits.push(c);
|
|
state = 1;
|
|
} else {
|
|
if (state == 1) && (!seg.word.is_empty() || (start_word && segments.is_empty())) {
|
|
segments.push(seg);
|
|
seg = DurationTxtSegment::default();
|
|
}
|
|
if !matches!(c, '.' | ',') {
|
|
c.to_lowercase().for_each(|c| seg.word.push(c));
|
|
}
|
|
state = 2;
|
|
}
|
|
}
|
|
if !seg.word.is_empty() || !seg.digits.is_empty() {
|
|
segments.push(seg);
|
|
}
|
|
|
|
segments
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use std::{collections::BTreeMap, fs::File, io::BufReader};
|
|
|
|
use path_macro::path;
|
|
use rstest::rstest;
|
|
use time::macros::{date, datetime};
|
|
|
|
use super::*;
|
|
use crate::util::tests::TESTFILES;
|
|
|
|
#[rstest]
|
|
#[case::de(Language::De, "vor 1 Sekunde", Some(TimeAgo { n: 1, unit: TimeUnit::Second }))]
|
|
#[case::ar(Language::Ar, "قبل ساعة واحدة", Some(TimeAgo { n: 1, unit: TimeUnit::Hour }))]
|
|
// No-break space
|
|
#[case::nbsp(Language::De, "Vor 3\u{a0}Tagen aktualisiert", Some(TimeAgo { n: 3, unit: TimeUnit::Day }))]
|
|
fn t_parse(
|
|
#[case] lang: Language,
|
|
#[case] textual_date: &str,
|
|
#[case] expect: Option<TimeAgo>,
|
|
) {
|
|
let time_ago = parse_timeago(lang, textual_date);
|
|
assert_eq!(time_ago, expect);
|
|
}
|
|
|
|
#[test]
|
|
fn t_testfile() {
|
|
let json_path = path!(*TESTFILES / "dict" / "timeago_samples.json");
|
|
|
|
let expect = [
|
|
TimeAgo {
|
|
n: 10,
|
|
unit: TimeUnit::Minute,
|
|
},
|
|
TimeAgo {
|
|
n: 20,
|
|
unit: TimeUnit::Minute,
|
|
},
|
|
TimeAgo {
|
|
n: 1,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 2,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 7,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 8,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 9,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 10,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 11,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 12,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 13,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 14,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 15,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 3,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 4,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 4,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 5,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 6,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 6,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 20,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 2,
|
|
unit: TimeUnit::Day,
|
|
},
|
|
TimeAgo {
|
|
n: 3,
|
|
unit: TimeUnit::Day,
|
|
},
|
|
TimeAgo {
|
|
n: 5,
|
|
unit: TimeUnit::Day,
|
|
},
|
|
TimeAgo {
|
|
n: 6,
|
|
unit: TimeUnit::Day,
|
|
},
|
|
TimeAgo {
|
|
n: 8,
|
|
unit: TimeUnit::Day,
|
|
},
|
|
TimeAgo {
|
|
n: 10,
|
|
unit: TimeUnit::Day,
|
|
},
|
|
TimeAgo {
|
|
n: 12,
|
|
unit: TimeUnit::Day,
|
|
},
|
|
TimeAgo {
|
|
n: 2,
|
|
unit: TimeUnit::Week,
|
|
},
|
|
TimeAgo {
|
|
n: 3,
|
|
unit: TimeUnit::Week,
|
|
},
|
|
TimeAgo {
|
|
n: 4,
|
|
unit: TimeUnit::Week,
|
|
},
|
|
TimeAgo {
|
|
n: 1,
|
|
unit: TimeUnit::Month,
|
|
},
|
|
TimeAgo {
|
|
n: 8,
|
|
unit: TimeUnit::Month,
|
|
},
|
|
TimeAgo {
|
|
n: 11,
|
|
unit: TimeUnit::Month,
|
|
},
|
|
TimeAgo {
|
|
n: 1,
|
|
unit: TimeUnit::Year,
|
|
},
|
|
TimeAgo {
|
|
n: 2,
|
|
unit: TimeUnit::Year,
|
|
},
|
|
TimeAgo {
|
|
n: 3,
|
|
unit: TimeUnit::Year,
|
|
},
|
|
TimeAgo {
|
|
n: 4,
|
|
unit: TimeUnit::Year,
|
|
},
|
|
];
|
|
|
|
let json_file = File::open(json_path).unwrap();
|
|
let strings_map: BTreeMap<Language, Vec<String>> =
|
|
serde_json::from_reader(BufReader::new(json_file)).unwrap();
|
|
|
|
for (lang, strings) in &strings_map {
|
|
assert_eq!(strings.len(), expect.len());
|
|
strings.iter().enumerate().for_each(|(n, s)| {
|
|
assert_eq!(
|
|
parse_timeago(*lang, s),
|
|
Some(expect[n]),
|
|
"Language: {lang}, txt: `{s}`"
|
|
);
|
|
});
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn t_testfile_short() {
|
|
let json_path = path!(*TESTFILES / "dict" / "timeago_samples_short.json");
|
|
|
|
let expect = [
|
|
TimeAgo {
|
|
n: 35,
|
|
unit: TimeUnit::Minute,
|
|
},
|
|
TimeAgo {
|
|
n: 50,
|
|
unit: TimeUnit::Minute,
|
|
},
|
|
TimeAgo {
|
|
n: 1,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 2,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 3,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 4,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 5,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 6,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 7,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 8,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 9,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 12,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 17,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 18,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 19,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 20,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 10,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 11,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 13,
|
|
unit: TimeUnit::Hour,
|
|
},
|
|
TimeAgo {
|
|
n: 1,
|
|
unit: TimeUnit::Day,
|
|
},
|
|
TimeAgo {
|
|
n: 2,
|
|
unit: TimeUnit::Day,
|
|
},
|
|
TimeAgo {
|
|
n: 3,
|
|
unit: TimeUnit::Day,
|
|
},
|
|
TimeAgo {
|
|
n: 4,
|
|
unit: TimeUnit::Day,
|
|
},
|
|
TimeAgo {
|
|
n: 6,
|
|
unit: TimeUnit::Day,
|
|
},
|
|
TimeAgo {
|
|
n: 8,
|
|
unit: TimeUnit::Day,
|
|
},
|
|
TimeAgo {
|
|
n: 10,
|
|
unit: TimeUnit::Day,
|
|
},
|
|
TimeAgo {
|
|
n: 11,
|
|
unit: TimeUnit::Day,
|
|
},
|
|
TimeAgo {
|
|
n: 12,
|
|
unit: TimeUnit::Day,
|
|
},
|
|
TimeAgo {
|
|
n: 13,
|
|
unit: TimeUnit::Day,
|
|
},
|
|
TimeAgo {
|
|
n: 2,
|
|
unit: TimeUnit::Week,
|
|
},
|
|
TimeAgo {
|
|
n: 3,
|
|
unit: TimeUnit::Week,
|
|
},
|
|
TimeAgo {
|
|
n: 1,
|
|
unit: TimeUnit::Month,
|
|
},
|
|
TimeAgo {
|
|
n: 4,
|
|
unit: TimeUnit::Week,
|
|
},
|
|
TimeAgo {
|
|
n: 7,
|
|
unit: TimeUnit::Month,
|
|
},
|
|
TimeAgo {
|
|
n: 10,
|
|
unit: TimeUnit::Month,
|
|
},
|
|
TimeAgo {
|
|
n: 1,
|
|
unit: TimeUnit::Year,
|
|
},
|
|
TimeAgo {
|
|
n: 2,
|
|
unit: TimeUnit::Year,
|
|
},
|
|
TimeAgo {
|
|
n: 3,
|
|
unit: TimeUnit::Year,
|
|
},
|
|
TimeAgo {
|
|
n: 4,
|
|
unit: TimeUnit::Year,
|
|
},
|
|
TimeAgo {
|
|
n: 5,
|
|
unit: TimeUnit::Year,
|
|
},
|
|
];
|
|
|
|
let json_file = File::open(json_path).unwrap();
|
|
let strings_map: BTreeMap<Language, Vec<String>> =
|
|
serde_json::from_reader(BufReader::new(json_file)).unwrap();
|
|
|
|
for (lang, strings) in &strings_map {
|
|
assert_eq!(strings.len(), expect.len(), "Language: {lang}");
|
|
strings.iter().enumerate().for_each(|(n, s)| {
|
|
let mut exp = expect[n];
|
|
if *lang == Language::Mn && exp.unit == TimeUnit::Week {
|
|
exp.unit = TimeUnit::Day;
|
|
exp.n *= 7;
|
|
}
|
|
|
|
assert_eq!(
|
|
parse_timeago(*lang, s),
|
|
Some(exp),
|
|
"Language: {lang}, txt: `{s}`"
|
|
);
|
|
});
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn t_timeago_table() {
|
|
#[derive(Debug, Clone, Deserialize)]
|
|
struct TimeagoTable {
|
|
entries: BTreeMap<Language, BTreeMap<TimeUnit, TimeagoTableEntry>>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Deserialize)]
|
|
struct TimeagoTableEntry {
|
|
cases: BTreeMap<String, u8>,
|
|
}
|
|
|
|
let json_path = path!(*TESTFILES / "dict" / "timeago_table.json");
|
|
let json_file = File::open(json_path).unwrap();
|
|
let timeago_table: TimeagoTable =
|
|
serde_json::from_reader(BufReader::new(json_file)).unwrap();
|
|
let mut n_cases = 0;
|
|
|
|
timeago_table.entries.iter().for_each(|(lang, entries)| {
|
|
for (t, entry) in entries {
|
|
entry.cases.iter().for_each(|(txt, n)| {
|
|
let timeago = parse_timeago(*lang, txt);
|
|
assert_eq!(
|
|
timeago,
|
|
Some(TimeAgo { n: *n, unit: *t }),
|
|
"lang: {lang}, txt: {txt}"
|
|
);
|
|
|
|
n_cases += 1;
|
|
});
|
|
}
|
|
});
|
|
|
|
assert_eq!(n_cases, 1065);
|
|
}
|
|
|
|
#[rstest]
|
|
#[case(Language::En, "Updated today", Some(ParsedDate::Relative(TimeAgo { n: 0, unit: TimeUnit::Day })))]
|
|
#[case(Language::En, "Updated yesterday", Some(ParsedDate::Relative(TimeAgo { n: 1, unit: TimeUnit::Day })))]
|
|
#[case(Language::En, "Updated 2 days ago", Some(ParsedDate::Relative(TimeAgo { n: 2, unit: TimeUnit::Day })))]
|
|
#[case(Language::Si, "ඊයේ යාවත්කාලීන කරන ලදී", Some(ParsedDate::Relative(TimeAgo { n: 1, unit: TimeUnit::Day })))]
|
|
#[case(
|
|
Language::En,
|
|
"Last updated on Jun 04, 2003",
|
|
Some(ParsedDate::Absolute(date!(2003-6-4)))
|
|
)]
|
|
#[case(
|
|
Language::Bn,
|
|
"যোগ দিয়েছেন 24 সেপ, 2013",
|
|
Some(ParsedDate::Absolute(date!(2013-9-24)))
|
|
)]
|
|
fn t_parse_date(
|
|
#[case] lang: Language,
|
|
#[case] textual_date: &str,
|
|
#[case] expect: Option<ParsedDate>,
|
|
) {
|
|
let parsed_date = parse_textual_date(lang, textual_date);
|
|
assert_eq!(parsed_date, expect);
|
|
}
|
|
|
|
#[test]
|
|
fn t_parse_date_samples() {
|
|
let json_path = path!(*TESTFILES / "dict" / "playlist_samples.json");
|
|
let json_file = File::open(json_path).unwrap();
|
|
let date_samples: BTreeMap<Language, BTreeMap<String, String>> =
|
|
serde_json::from_reader(BufReader::new(json_file)).unwrap();
|
|
|
|
for (lang, samples) in &date_samples {
|
|
assert_eq!(
|
|
parse_textual_date(*lang, samples.get("Today").unwrap()),
|
|
Some(ParsedDate::Relative(TimeAgo {
|
|
n: 0,
|
|
unit: TimeUnit::Day
|
|
})),
|
|
"lang: {lang}"
|
|
);
|
|
assert_eq!(
|
|
parse_textual_date(*lang, samples.get("Yesterday").unwrap()),
|
|
Some(ParsedDate::Relative(TimeAgo {
|
|
n: 1,
|
|
unit: TimeUnit::Day
|
|
})),
|
|
"lang: {lang}"
|
|
);
|
|
assert_eq!(
|
|
parse_textual_date(*lang, samples.get("Ago").unwrap()),
|
|
Some(ParsedDate::Relative(TimeAgo {
|
|
n: 5,
|
|
unit: TimeUnit::Day
|
|
})),
|
|
"lang: {lang}"
|
|
);
|
|
assert_eq!(
|
|
parse_textual_date(*lang, samples.get("Jan").unwrap()),
|
|
Some(ParsedDate::Absolute(date!(2020 - 1 - 3))),
|
|
"lang: {lang}"
|
|
);
|
|
assert_eq!(
|
|
parse_textual_date(*lang, samples.get("Feb").unwrap()),
|
|
Some(ParsedDate::Absolute(date!(2016 - 2 - 7))),
|
|
"lang: {lang}"
|
|
);
|
|
assert_eq!(
|
|
parse_textual_date(*lang, samples.get("Mar").unwrap()),
|
|
Some(ParsedDate::Absolute(date!(2015 - 3 - 9))),
|
|
"lang: {lang}"
|
|
);
|
|
assert_eq!(
|
|
parse_textual_date(*lang, samples.get("Apr").unwrap()),
|
|
Some(ParsedDate::Absolute(date!(2017 - 4 - 2))),
|
|
"lang: {lang}"
|
|
);
|
|
assert_eq!(
|
|
parse_textual_date(*lang, samples.get("May").unwrap()),
|
|
Some(ParsedDate::Absolute(date!(2014 - 5 - 22))),
|
|
"lang: {lang}"
|
|
);
|
|
assert_eq!(
|
|
parse_textual_date(*lang, samples.get("Jun").unwrap()),
|
|
Some(ParsedDate::Absolute(date!(2014 - 6 - 28))),
|
|
"lang: {lang}"
|
|
);
|
|
assert_eq!(
|
|
parse_textual_date(*lang, samples.get("Jul").unwrap()),
|
|
Some(ParsedDate::Absolute(date!(2014 - 7 - 2))),
|
|
"lang: {lang}"
|
|
);
|
|
assert_eq!(
|
|
parse_textual_date(*lang, samples.get("Aug").unwrap()),
|
|
Some(ParsedDate::Absolute(date!(2015 - 8 - 23))),
|
|
"lang: {lang}"
|
|
);
|
|
assert_eq!(
|
|
parse_textual_date(*lang, samples.get("Sep").unwrap()),
|
|
Some(ParsedDate::Absolute(date!(2018 - 9 - 16))),
|
|
"lang: {lang}"
|
|
);
|
|
assert_eq!(
|
|
parse_textual_date(*lang, samples.get("Oct").unwrap()),
|
|
Some(ParsedDate::Absolute(date!(2014 - 10 - 31))),
|
|
"lang: {lang}"
|
|
);
|
|
assert_eq!(
|
|
parse_textual_date(*lang, samples.get("Nov").unwrap()),
|
|
Some(ParsedDate::Absolute(date!(2016 - 11 - 3))),
|
|
"lang: {lang}"
|
|
);
|
|
assert_eq!(
|
|
parse_textual_date(*lang, samples.get("Dec").unwrap()),
|
|
Some(ParsedDate::Absolute(date!(2021 - 12 - 24))),
|
|
"lang: {lang}"
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn t_parse_video_duration() {
|
|
let json_path = path!(*TESTFILES / "dict" / "video_duration_samples.json");
|
|
let json_file = File::open(json_path).unwrap();
|
|
let date_samples: BTreeMap<Language, BTreeMap<String, u32>> =
|
|
serde_json::from_reader(BufReader::new(json_file)).unwrap();
|
|
|
|
for (lang, samples) in &date_samples {
|
|
for (txt, duration) in samples {
|
|
assert_eq!(
|
|
parse_video_duration(*lang, txt),
|
|
Some(*duration),
|
|
"lang: {lang}; txt: `{txt}`"
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
#[rstest]
|
|
#[case(Language::Ar, "19 دقيقة وثانيتان", 1142)]
|
|
#[case(Language::Ar, "دقيقة و13 ثانية", 73)]
|
|
#[case(Language::Sw, "dakika 1 na sekunde 13", 73)]
|
|
fn t_parse_video_duration2(
|
|
#[case] lang: Language,
|
|
#[case] video_duration: &str,
|
|
#[case] expect: u32,
|
|
) {
|
|
assert_eq!(parse_video_duration(lang, video_duration), Some(expect));
|
|
}
|
|
|
|
#[test]
|
|
fn t_to_datetime() {
|
|
// Absolute date
|
|
let date = parse_textual_date_to_dt(Language::En, "Last updated on Jan 3, 2020").unwrap();
|
|
assert_eq!(date, datetime!(2020-1-3 0:00 +0));
|
|
|
|
// Relative date
|
|
let date = parse_textual_date_to_dt(Language::En, "1 year ago").unwrap();
|
|
let now = OffsetDateTime::now_utc();
|
|
assert_eq!(date.year(), now.year() - 1);
|
|
}
|
|
}
|