fix: update large number samples
This commit is contained in:
parent
e94de9a0f6
commit
72d817edd7
8 changed files with 33785 additions and 16936 deletions
|
|
@ -339,22 +339,24 @@ pub(crate) fn entry(lang: Language) -> Entry {
|
|||
},
|
||||
comma_decimal: false,
|
||||
number_tokens: ::phf::Map {
|
||||
key: 10121458955350035957,
|
||||
key: 12913932095322966823,
|
||||
disps: &[
|
||||
(0, 5),
|
||||
(6, 0),
|
||||
(0, 7),
|
||||
(9, 8),
|
||||
(0, 0),
|
||||
],
|
||||
entries: &[
|
||||
("ল\u{9be}", 5),
|
||||
("কোঃট\u{9be}", 9),
|
||||
("নিঃট\u{9be}", 6),
|
||||
("হ\u{9be}জ\u{9be}ৰ", 3),
|
||||
("ল\u{9be}খট\u{9be}", 5),
|
||||
("নিয\u{9c1}ত", 6),
|
||||
("হ\u{9be}", 3),
|
||||
("ল\u{9be}খ", 5),
|
||||
("হ\u{9be}জ\u{9be}ৰট\u{9be}", 3),
|
||||
("নিয\u{9c1}তট\u{9be}", 6),
|
||||
("হ\u{9be}জ\u{9be}ৰ", 3),
|
||||
("ল\u{9be}", 5),
|
||||
("ল\u{9be}খট\u{9be}", 5),
|
||||
("কোঃট\u{9be}", 9),
|
||||
("নিঃট\u{9be}", 6),
|
||||
("নিয\u{9c1}ত", 6),
|
||||
("নিঃ", 6),
|
||||
("ল\u{9be}খ", 5),
|
||||
],
|
||||
},
|
||||
album_types: ::phf::Map {
|
||||
|
|
@ -851,14 +853,14 @@ pub(crate) fn entry(lang: Language) -> Entry {
|
|||
},
|
||||
comma_decimal: true,
|
||||
number_tokens: ::phf::Map {
|
||||
key: 12913932095322966823,
|
||||
key: 7485420634051515786,
|
||||
disps: &[
|
||||
(2, 0),
|
||||
],
|
||||
entries: &[
|
||||
("mM", 9),
|
||||
("M", 6),
|
||||
("m", 3),
|
||||
("kM", 9),
|
||||
("M", 6),
|
||||
],
|
||||
},
|
||||
album_types: ::phf::Map {
|
||||
|
|
@ -3181,18 +3183,14 @@ pub(crate) fn entry(lang: Language) -> Entry {
|
|||
},
|
||||
comma_decimal: false,
|
||||
number_tokens: ::phf::Map {
|
||||
key: 2980949210194914378,
|
||||
key: 12913932095322966823,
|
||||
disps: &[
|
||||
(1, 3),
|
||||
(5, 0),
|
||||
(1, 0),
|
||||
],
|
||||
entries: &[
|
||||
("억명", 8),
|
||||
("천명", 3),
|
||||
("만회", 4),
|
||||
("천회", 3),
|
||||
("억회", 8),
|
||||
("만명", 4),
|
||||
("천", 3),
|
||||
("만", 4),
|
||||
("억", 8),
|
||||
],
|
||||
},
|
||||
album_types: ::phf::Map {
|
||||
|
|
@ -3964,18 +3962,18 @@ pub(crate) fn entry(lang: Language) -> Entry {
|
|||
},
|
||||
comma_decimal: false,
|
||||
number_tokens: ::phf::Map {
|
||||
key: 10121458955350035957,
|
||||
key: 12913932095322966823,
|
||||
disps: &[
|
||||
(5, 1),
|
||||
(2, 0),
|
||||
(3, 0),
|
||||
(0, 2),
|
||||
],
|
||||
entries: &[
|
||||
("ထောင\u{103a}", 3),
|
||||
("သန\u{103a}း", 6),
|
||||
("က\u{102f}ဋေထ", 10),
|
||||
("ထ", 3),
|
||||
("က\u{102f}ဋေ", 7),
|
||||
("သောင\u{103a}း", 4),
|
||||
("သန\u{103a}း", 6),
|
||||
("ထောင\u{103a}", 3),
|
||||
("သ\u{102d}န\u{103a}း", 5),
|
||||
("သောင\u{103a}း", 4),
|
||||
],
|
||||
},
|
||||
album_types: ::phf::Map {
|
||||
|
|
|
|||
|
|
@ -335,7 +335,7 @@ where
|
|||
_ => dict_entry.number_tokens.get(token).map(|t| *t as i32),
|
||||
};
|
||||
|
||||
if dict_entry.by_char {
|
||||
if dict_entry.by_char || lang == Language::Ko {
|
||||
exp += filtered
|
||||
.chars()
|
||||
.filter_map(|token| lookup_token(&token.to_string()))
|
||||
|
|
@ -511,7 +511,7 @@ pub(crate) mod tests {
|
|||
fn t_parse_large_numstr_samples() {
|
||||
let json_path = path!(*TESTFILES / "dict" / "large_number_samples.json");
|
||||
let json_file = File::open(json_path).unwrap();
|
||||
let number_samples: BTreeMap<Language, BTreeMap<u8, (String, u64)>> =
|
||||
let number_samples: BTreeMap<Language, BTreeMap<String, (String, u64)>> =
|
||||
serde_json::from_reader(BufReader::new(json_file)).unwrap();
|
||||
|
||||
number_samples.iter().for_each(|(lang, entry)| {
|
||||
|
|
@ -540,12 +540,17 @@ pub(crate) mod tests {
|
|||
// in the string.
|
||||
let rounded = {
|
||||
let n_significant_d = string.chars().filter(char::is_ascii_digit).count();
|
||||
let mag = (expect as f64).log10().floor();
|
||||
let factor = 10_u64.pow(1 + mag as u32 - n_significant_d as u32);
|
||||
(((expect as f64) / factor as f64).floor() as u64) * factor
|
||||
if n_significant_d == 0 {
|
||||
expect
|
||||
} else {
|
||||
let mag = (expect as f64).log10().floor();
|
||||
let factor = 10_u64.pow(1 + mag as u32 - n_significant_d as u32);
|
||||
(((expect as f64) / factor as f64).floor() as u64) * factor
|
||||
}
|
||||
};
|
||||
|
||||
let res = parse_large_numstr::<u64>(string, lang).expect(string);
|
||||
// TODO: add support for zero values
|
||||
let res = parse_large_numstr::<u64>(string, lang).unwrap_or_default();
|
||||
assert_eq!(res, rounded, "{string} (lang: {lang}, exact: {expect})");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Reference in a new issue