fix: improved timeago performance

This commit is contained in:
ThetaDev 2022-09-05 14:33:04 +02:00
parent cc63477406
commit 500ea77788
8 changed files with 3064 additions and 1088 deletions

View file

@ -29,6 +29,7 @@ futures = "0.3.21"
indicatif = "0.17.0"
filenamify = "0.1.0"
ress = "0.11.4"
phf = "0.11.1"
[dev-dependencies]
env_logger = "0.9.0"
@ -39,3 +40,4 @@ insta = "1.17.1"
velcro = "0.5.3"
unic-langid = "0.9.0"
intl_pluralrules = "7.0.1"
phf_codegen = "0.11.1"

View file

@ -164,7 +164,7 @@ async fn get_comment_datestrings(rp: &RustyTube, ctoken: &str) -> (Vec<String>,
// #[test_log::test(tokio::test)]
#[allow(dead_code)]
async fn download_timeago_testfiles() {
let json_path = Path::new("testfiles/date/timeago.json").to_path_buf();
let json_path = Path::new("testfiles/date/timeago_samples.json").to_path_buf();
if json_path.exists() {
return;
}
@ -312,7 +312,7 @@ struct TimeagoTable {
#[derive(Debug, Clone, Serialize, Deserialize)]
struct TimeagoTableEntry {
cases: BTreeMap<String, u32>,
cases: BTreeMap<String, u8>,
missing_plurals: HashSet<PluralCat>,
}

1682
src/dictionary.rs Normal file

File diff suppressed because it is too large Load diff

View file

@ -6,9 +6,10 @@ mod macros;
mod cache;
mod deobfuscate;
mod serializer;
mod timeago;
mod util;
mod dictionary;
pub mod client;
pub mod download;
pub mod model;
pub mod timeago;

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

98
tests/gen_dictionary.rs Normal file
View file

@ -0,0 +1,98 @@
use std::{
collections::BTreeMap,
fmt::Debug,
fs::File,
io::{BufReader, BufWriter, Write}, path::Path,
};
use fancy_regex::Regex;
use once_cell::sync::Lazy;
use rustypipe::{model::Language, timeago::TimeUnit};
use serde::Deserialize;
const DICT_PATH: &str = "testfiles/date/dictionary.json";
const TARGET_FILE: &str = "src/dictionary.rs";
type Dictionary = BTreeMap<Language, DictEntry>;
#[derive(Debug, Deserialize)]
struct DictEntry {
#[serde(default)]
equivalent: Vec<Language>,
timeago_tokens: BTreeMap<String, String>,
}
fn parse_tu(tu: &str) -> (u8, Option<TimeUnit>) {
static TU_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"^(\d*)(\w?)$").unwrap());
match TU_PATTERN.captures(tu).unwrap() {
Some(cap) => (
cap.get(1).unwrap().as_str().parse().unwrap_or(1),
match cap.get(2).unwrap().as_str() {
"s" => Some(TimeUnit::Second),
"m" => Some(TimeUnit::Minute),
"h" => Some(TimeUnit::Hour),
"D" => Some(TimeUnit::Day),
"W" => Some(TimeUnit::Week),
"M" => Some(TimeUnit::Month),
"Y" => Some(TimeUnit::Year),
"" => None,
_ => panic!("invalid time unit: {}", tu),
},
),
None => panic!("invalid time unit: {}", tu),
}
}
fn read_dict() -> Dictionary {
let json_file = File::open(DICT_PATH).unwrap();
serde_json::from_reader(BufReader::new(json_file)).unwrap()
}
// #[test]
fn t_gen() {
let dict = read_dict();
let code_head = r#"// This file is automatically generated. DO NOT EDIT.
use crate::{
model::Language,
timeago::{TaToken, TimeUnit},
};
"#;
let mut code_timeago_tokens = r#"#[rustfmt::skip]
pub(crate) fn get_timeago_tokens(lang: Language) -> phf::Map<&'static str, TaToken> {
match lang {
"#
.to_owned();
dict.iter().for_each(|(lang, entry)| {
// Create a map for the language
let mut map = phf_codegen::Map::<&str>::new();
entry.timeago_tokens.iter().for_each(|(txt, tu_str)| {
let (n, unit) = parse_tu(&tu_str);
match unit {
Some(unit) => map.entry(
&txt,
&format!("TaToken {{ n: {}, unit: Some(TimeUnit::{:?}) }}", n, unit),
),
None => map.entry(&txt, &format!("TaToken {{ n: {}, unit: None }}", n)),
};
});
let mut selector = format!("Language::{:?}", lang);
entry.equivalent.iter().for_each(|eq| {
selector += &format!(" | Language::{:?}", eq);
});
let code_map = &map.build().to_string().replace('\n', "\n ");
code_timeago_tokens += &format!("{} => {},\n ", selector, code_map);
});
code_timeago_tokens = code_timeago_tokens.trim_end().to_owned() + "\n }\n}\n";
let code = format!("{}\n{}", code_head, code_timeago_tokens);
std::fs::write(TARGET_FILE, code).unwrap();
}