fix: add dictionary support for short timeago strings

This commit is contained in:
ThetaDev 2023-05-31 01:41:46 +02:00
parent cc2cadc309
commit 0cd018e37a
10 changed files with 6308 additions and 1694 deletions

File diff suppressed because it is too large Load diff

View file

@ -128,7 +128,35 @@ where
buf.parse()
}
/// Parse all numbers occurring in a string and reurn them as a vec
/// Parse a string after removing all non-numeric characters.
///
/// If the string contains multiple numbers, it returns the product of them.
pub fn parse_numeric_prod<F>(string: &str) -> Option<F>
where
F: FromStr + Copy + std::ops::Mul<Output = F>,
{
let mut n = None;
let mut buf = String::new();
for c in string.chars() {
if c.is_ascii_digit() {
buf.push(c);
} else if !buf.is_empty() {
if let Ok(x) = buf.parse::<F>() {
n = n.map(|n| n * x).or(Some(x));
}
buf.clear();
}
}
if !buf.is_empty() {
if let Ok(x) = buf.parse::<F>() {
n = n.map(|n| n * x).or(Some(x));
}
}
n
}
/// Parse all numbers occurring in a string and return them as a vec
pub fn parse_numeric_vec<F>(string: &str) -> Vec<F>
where
F: FromStr,

View file

@ -199,7 +199,20 @@ pub fn parse_timeago(lang: Language, textual_date: &str) -> Option<TimeAgo> {
let entry = dictionary::entry(lang);
let filtered_str = filter_str(textual_date);
let qu: u8 = util::parse_numeric(textual_date).unwrap_or(1);
let qu: u8 = util::parse_numeric_prod(textual_date).unwrap_or(1);
// French uses 'a' as a short form of years.
// Since 'a' is also a word in French, it cannot be parsed as a token.
if matches!(
lang,
Language::Fr | Language::FrCa | Language::Es | Language::Es419 | Language::EsUs
) && textual_date.ends_with(" a")
{
return Some(TimeAgo {
n: qu,
unit: TimeUnit::Year,
});
}
TaTokenParser::new(&entry, util::lang_by_char(lang), false, &filtered_str)
.next()
@ -403,10 +416,10 @@ mod tests {
use crate::util::tests::TESTFILES;
#[rstest]
#[case(Language::De, "vor 1 Sekunde", Some(TimeAgo { n: 1, unit: TimeUnit::Second }))]
#[case(Language::Ar, "قبل ساعة واحدة", Some(TimeAgo { n: 1, unit: TimeUnit::Hour }))]
#[case::de(Language::De, "vor 1 Sekunde", Some(TimeAgo { n: 1, unit: TimeUnit::Second }))]
#[case::ar(Language::Ar, "قبل ساعة واحدة", Some(TimeAgo { n: 1, unit: TimeUnit::Hour }))]
// No-break space
#[case(Language::De, "Vor 3\u{a0}Tagen aktualisiert", Some(TimeAgo { n: 3, unit: TimeUnit::Day }))]
#[case::nbsp(Language::De, "Vor 3\u{a0}Tagen aktualisiert", Some(TimeAgo { n: 3, unit: TimeUnit::Day }))]
fn t_parse(
#[case] lang: Language,
#[case] textual_date: &str,
@ -581,7 +594,196 @@ mod tests {
assert_eq!(
parse_timeago(*lang, s),
Some(expect[n]),
"Language: {lang}, n: {n}"
"Language: {lang}, txt: `{s}`"
);
});
})
}
#[test]
fn t_testfile_short() {
let json_path = path!(*TESTFILES / "dict" / "timeago_samples_short.json");
let expect = [
TimeAgo {
n: 35,
unit: TimeUnit::Minute,
},
TimeAgo {
n: 50,
unit: TimeUnit::Minute,
},
TimeAgo {
n: 1,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 2,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 3,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 4,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 5,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 6,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 7,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 8,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 9,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 12,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 17,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 18,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 19,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 20,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 10,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 11,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 13,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 1,
unit: TimeUnit::Day,
},
TimeAgo {
n: 2,
unit: TimeUnit::Day,
},
TimeAgo {
n: 3,
unit: TimeUnit::Day,
},
TimeAgo {
n: 4,
unit: TimeUnit::Day,
},
TimeAgo {
n: 6,
unit: TimeUnit::Day,
},
TimeAgo {
n: 8,
unit: TimeUnit::Day,
},
TimeAgo {
n: 10,
unit: TimeUnit::Day,
},
TimeAgo {
n: 11,
unit: TimeUnit::Day,
},
TimeAgo {
n: 12,
unit: TimeUnit::Day,
},
TimeAgo {
n: 13,
unit: TimeUnit::Day,
},
TimeAgo {
n: 2,
unit: TimeUnit::Week,
},
TimeAgo {
n: 3,
unit: TimeUnit::Week,
},
TimeAgo {
n: 1,
unit: TimeUnit::Month,
},
TimeAgo {
n: 4,
unit: TimeUnit::Week,
},
TimeAgo {
n: 7,
unit: TimeUnit::Month,
},
TimeAgo {
n: 10,
unit: TimeUnit::Month,
},
TimeAgo {
n: 1,
unit: TimeUnit::Year,
},
TimeAgo {
n: 2,
unit: TimeUnit::Year,
},
TimeAgo {
n: 3,
unit: TimeUnit::Year,
},
TimeAgo {
n: 4,
unit: TimeUnit::Year,
},
TimeAgo {
n: 5,
unit: TimeUnit::Year,
},
];
let json_file = File::open(json_path).unwrap();
let strings_map: BTreeMap<Language, Vec<String>> =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
strings_map.iter().for_each(|(lang, strings)| {
assert_eq!(strings.len(), expect.len(), "Language: {lang}");
strings.iter().enumerate().for_each(|(n, s)| {
let mut exp = expect[n];
if *lang == Language::Mn && exp.unit == TimeUnit::Week {
exp.unit = TimeUnit::Day;
exp.n *= 7;
}
assert_eq!(
parse_timeago(*lang, s),
Some(exp),
"Language: {lang}, txt: `{s}`"
);
});
})