fix: improve number parsing, add number_nd_tokens
add dictionary overrides
This commit is contained in:
parent
97492780c6
commit
19781eab36
13 changed files with 33097 additions and 35712 deletions
|
|
@ -72,7 +72,7 @@ pub fn write_samples_to_dict(project_root: &Path) {
|
|||
});
|
||||
}
|
||||
|
||||
util::write_dict(project_root, &dict);
|
||||
util::write_dict(project_root, dict);
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
|
|
|
|||
|
|
@ -1,6 +1,10 @@
|
|||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
use std::{collections::BTreeMap, fs::File, io::BufReader, path::Path};
|
||||
use std::{
|
||||
collections::{BTreeMap, HashMap, HashSet},
|
||||
fs::File,
|
||||
io::BufReader,
|
||||
path::Path,
|
||||
};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use futures::{stream, StreamExt};
|
||||
|
|
@ -9,26 +13,12 @@ use path_macro::path;
|
|||
use regex::Regex;
|
||||
use rustypipe::client::{ClientType, RustyPipe, RustyPipeQuery};
|
||||
use rustypipe::param::{locale::LANGUAGES, Language};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde::Deserialize;
|
||||
use serde_with::{serde_as, DefaultOnError, VecSkipError};
|
||||
|
||||
use crate::util::{self, QBrowse, QCont, Text};
|
||||
use crate::util::{self, QBrowse, QCont, Text, TextRuns};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
|
||||
#[serde(untagged)]
|
||||
enum NumKey {
|
||||
Mag(u8),
|
||||
S(NumKeyS),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
enum NumKeyS {
|
||||
Zero,
|
||||
One,
|
||||
}
|
||||
|
||||
type CollectedNumbers = BTreeMap<Language, BTreeMap<NumKey, (String, u64)>>;
|
||||
type CollectedNumbers = BTreeMap<Language, BTreeMap<String, u64>>;
|
||||
|
||||
/// Collect video view count texts in every supported language
|
||||
/// and write them to `testfiles/dict/large_number_samples.json`.
|
||||
|
|
@ -45,9 +35,7 @@ type CollectedNumbers = BTreeMap<Language, BTreeMap<NumKey, (String, u64)>>;
|
|||
/// outputs view counts both in approximated and exact format, so we can use
|
||||
/// the exact counts to figure out the tokens.
|
||||
pub async fn collect_large_numbers(project_root: &Path, concurrency: usize) {
|
||||
let json_path = path!(project_root / "testfiles" / "dict" / "large_number_samples.json");
|
||||
let json_path_all =
|
||||
path!(project_root / "testfiles" / "dict" / "large_number_samples_all.json");
|
||||
let json_path = path!(project_root / "testfiles" / "dict" / "large_number_samples_all.json");
|
||||
let rp = RustyPipe::new();
|
||||
|
||||
let channels = [
|
||||
|
|
@ -64,6 +52,16 @@ pub async fn collect_large_numbers(project_root: &Path, concurrency: usize) {
|
|||
"UCQXYK94vDqOEkPbTCyL0OjA", // (1)
|
||||
];
|
||||
|
||||
// YTM outputs the subscriber count in a shortened format in some languages
|
||||
let music_channels = [
|
||||
"UC_1N84buVNgR_-3gDZ9Jtxg", // 10e8 (158M)
|
||||
"UCRw0x9_EfawqmgDI2IgQLLg", // 10e7 (29M)
|
||||
"UChWu2clmvJ5wN_0Ic5dnqmw", // 10e6 (1.9M)
|
||||
"UCOYiPDuimprrGHgFy4_Fw8Q", // 10e5 (149K)
|
||||
"UC8nZf9WyVIxNMly_hy2PTyQ", // 10e4 (17K)
|
||||
"UCaltNL5XvZ7dKvBsBPi-gqg", // 10e3 (8K)
|
||||
];
|
||||
|
||||
// Build a lookup table for the channel's subscriber counts
|
||||
let subscriber_counts: Arc<BTreeMap<String, u64>> = stream::iter(channels)
|
||||
.map(|c| {
|
||||
|
|
@ -80,10 +78,26 @@ pub async fn collect_large_numbers(project_root: &Path, concurrency: usize) {
|
|||
.await
|
||||
.into();
|
||||
|
||||
let collected_numbers_all: BTreeMap<Language, BTreeMap<String, u64>> = stream::iter(LANGUAGES)
|
||||
let music_subscriber_counts: Arc<BTreeMap<String, u64>> = stream::iter(music_channels)
|
||||
.map(|c| {
|
||||
let rp = rp.query();
|
||||
async move {
|
||||
let subscriber_count = music_channel_subscribers(&rp, c).await.unwrap();
|
||||
|
||||
let n = util::parse_largenum_en(&subscriber_count).unwrap();
|
||||
(c.to_owned(), n)
|
||||
}
|
||||
})
|
||||
.buffer_unordered(concurrency)
|
||||
.collect::<BTreeMap<_, _>>()
|
||||
.await
|
||||
.into();
|
||||
|
||||
let collected_numbers: CollectedNumbers = stream::iter(LANGUAGES)
|
||||
.map(|lang| {
|
||||
let rp = rp.query().lang(lang);
|
||||
let subscriber_counts = subscriber_counts.clone();
|
||||
let music_subscriber_counts = music_subscriber_counts.clone();
|
||||
async move {
|
||||
let mut entry = BTreeMap::new();
|
||||
|
||||
|
|
@ -101,6 +115,15 @@ pub async fn collect_large_numbers(project_root: &Path, concurrency: usize) {
|
|||
println!("collected {lang}-{n}");
|
||||
}
|
||||
|
||||
for (n, ch_id) in music_channels.iter().enumerate() {
|
||||
let subscriber_count = music_channel_subscribers(&rp, ch_id)
|
||||
.await
|
||||
.context(format!("{lang}-music-{n}"))
|
||||
.unwrap();
|
||||
entry.insert(subscriber_count, music_subscriber_counts[*ch_id]);
|
||||
println!("collected {lang}-music-{n}");
|
||||
}
|
||||
|
||||
(lang, entry)
|
||||
}
|
||||
})
|
||||
|
|
@ -108,61 +131,13 @@ pub async fn collect_large_numbers(project_root: &Path, concurrency: usize) {
|
|||
.collect()
|
||||
.await;
|
||||
|
||||
let collected_numbers: CollectedNumbers = collected_numbers_all
|
||||
.iter()
|
||||
.map(|(lang, entry)| {
|
||||
let mut e2 = BTreeMap::new();
|
||||
entry.iter().for_each(|(txt, num)| {
|
||||
let key = if num == &0 {
|
||||
NumKey::S(NumKeyS::Zero)
|
||||
} else if num == &1 {
|
||||
NumKey::S(NumKeyS::One)
|
||||
} else {
|
||||
NumKey::Mag(get_mag(*num))
|
||||
};
|
||||
|
||||
e2.insert(key, (txt.to_owned(), *num));
|
||||
});
|
||||
(*lang, e2)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let file = File::create(json_path).unwrap();
|
||||
serde_json::to_writer_pretty(file, &collected_numbers).unwrap();
|
||||
|
||||
let file = File::create(json_path_all).unwrap();
|
||||
serde_json::to_writer_pretty(file, &collected_numbers_all).unwrap();
|
||||
}
|
||||
|
||||
/// Attempt to parse the numbers collected by `collect-large-numbers`
|
||||
/// and write the results to `dictionary.json`.
|
||||
pub fn write_samples_to_dict(project_root: &Path) {
|
||||
/*
|
||||
Manual corrections:
|
||||
as
|
||||
"কোঃটা": 9,
|
||||
"নিঃটা": 6,
|
||||
"নিযুতটা": 6,
|
||||
"লাখটা": 5,
|
||||
"হাজাৰটা": 3
|
||||
|
||||
ar
|
||||
"ألف": 3,
|
||||
"آلاف": 3,
|
||||
"مليار": 9,
|
||||
"مليون": 6
|
||||
|
||||
bn
|
||||
"লাটি": 5,
|
||||
"শত": 2,
|
||||
"হাটি": 3,
|
||||
"কোটি": 7
|
||||
|
||||
es/es-US
|
||||
"mil": 3,
|
||||
"M": 6
|
||||
*/
|
||||
|
||||
let json_path = path!(project_root / "testfiles" / "dict" / "large_number_samples.json");
|
||||
|
||||
let json_file = File::open(json_path).unwrap();
|
||||
|
|
@ -179,27 +154,21 @@ pub fn write_samples_to_dict(project_root: &Path) {
|
|||
let mut e_langs = dict_entry.equivalent.clone();
|
||||
e_langs.push(lang);
|
||||
|
||||
let comma_decimal = collected_nums
|
||||
.get(&lang)
|
||||
.unwrap()
|
||||
let comma_decimal = collected_nums[&lang]
|
||||
.iter()
|
||||
.find_map(|(key, (txt, _))| {
|
||||
match key {
|
||||
NumKey::Mag(mag) => {
|
||||
let point = POINT_REGEX
|
||||
.captures(txt)
|
||||
.map(|c| c.get(1).unwrap().as_str());
|
||||
.find_map(|(txt, val)| {
|
||||
let point = POINT_REGEX
|
||||
.captures(txt)
|
||||
.map(|c| c.get(1).unwrap().as_str());
|
||||
|
||||
if let Some(point) = point {
|
||||
let num_all = util::parse_numeric::<u64>(txt).unwrap();
|
||||
// If the number parsed from all digits has the same order of
|
||||
// magnitude as the actual number, it must be a separator.
|
||||
// Otherwise it is a decimal point
|
||||
return Some((get_mag(num_all) == *mag) ^ (point == ","));
|
||||
}
|
||||
}
|
||||
NumKey::S(_) => {}
|
||||
if let Some(point) = point {
|
||||
let num_all = util::parse_numeric::<u64>(txt).unwrap();
|
||||
// If the number parsed from all digits has the same order of
|
||||
// magnitude as the actual number, it must be a separator.
|
||||
// Otherwise it is a decimal point
|
||||
return Some((get_mag(num_all) == get_mag(*val)) ^ (point == ","));
|
||||
}
|
||||
|
||||
None
|
||||
})
|
||||
.unwrap();
|
||||
|
|
@ -217,6 +186,7 @@ pub fn write_samples_to_dict(project_root: &Path) {
|
|||
// If the token is found again with a different derived order of magnitude,
|
||||
// its value in the map is set to None.
|
||||
let mut found_tokens: HashMap<String, Option<u8>> = HashMap::new();
|
||||
let mut found_nd_tokens: HashMap<String, Option<u8>> = HashMap::new();
|
||||
|
||||
let mut insert_token = |token: String, mag: u8| {
|
||||
let found_token = found_tokens.entry(token).or_insert(match mag {
|
||||
|
|
@ -231,22 +201,30 @@ pub fn write_samples_to_dict(project_root: &Path) {
|
|||
}
|
||||
};
|
||||
|
||||
let mut insert_nd_token = |token: String, n: Option<u8>| {
|
||||
let found_token = found_nd_tokens.entry(token).or_insert(n);
|
||||
|
||||
if let Some(f) = found_token {
|
||||
if Some(*f) != n {
|
||||
*found_token = None;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
for lang in e_langs {
|
||||
let entry = collected_nums.get(&lang).unwrap();
|
||||
|
||||
entry.iter().for_each(|(key, (txt, _))| {
|
||||
match key {
|
||||
NumKey::Mag(mag) => {
|
||||
let filtered = util::filter_largenumstr(txt);
|
||||
entry.iter().for_each(|(txt, val)| {
|
||||
let filtered = util::filter_largenumstr(txt);
|
||||
let mag = get_mag(*val);
|
||||
|
||||
let tokens: Vec<String> = match dict_entry.by_char {
|
||||
true => filtered.chars().map(|c| c.to_string()).collect(),
|
||||
false => filtered.split_whitespace().map(|c| c.to_string()).collect(),
|
||||
};
|
||||
let tokens: Vec<String> = match dict_entry.by_char || lang == Language::Ko {
|
||||
true => filtered.chars().map(|c| c.to_string()).collect(),
|
||||
false => filtered.split_whitespace().map(|c| c.to_string()).collect(),
|
||||
};
|
||||
|
||||
let num_before_point =
|
||||
util::parse_numeric::<u64>(txt.split(decimal_point).next().unwrap())
|
||||
.unwrap();
|
||||
match util::parse_numeric::<u64>(txt.split(decimal_point).next().unwrap()) {
|
||||
Ok(num_before_point) => {
|
||||
let mag_before_point = get_mag(num_before_point);
|
||||
let mut mag_remaining = mag - mag_before_point;
|
||||
|
||||
|
|
@ -272,9 +250,22 @@ pub fn write_samples_to_dict(project_root: &Path) {
|
|||
} else {
|
||||
insert_token(t.to_owned(), mag_remaining);
|
||||
}
|
||||
insert_nd_token(t.to_owned(), None);
|
||||
});
|
||||
}
|
||||
NumKey::S(_) => {}
|
||||
Err(e) => {
|
||||
if matches!(e.kind(), std::num::IntErrorKind::Empty) {
|
||||
// Text does not contain any digits, search for nd_tokens
|
||||
tokens.iter().for_each(|t| {
|
||||
insert_nd_token(
|
||||
t.to_owned(),
|
||||
Some((*val).try_into().expect("nd_token value too large")),
|
||||
);
|
||||
});
|
||||
} else {
|
||||
panic!("{e}, txt: {txt}")
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
|
@ -284,6 +275,10 @@ pub fn write_samples_to_dict(project_root: &Path) {
|
|||
.into_iter()
|
||||
.filter_map(|(k, v)| v.map(|v| (k, v)))
|
||||
.collect();
|
||||
dict_entry.number_nd_tokens = found_nd_tokens
|
||||
.into_iter()
|
||||
.filter_map(|(k, v)| v.map(|v| (k, v)))
|
||||
.collect();
|
||||
dict_entry.comma_decimal = comma_decimal;
|
||||
|
||||
// Check for duplicates
|
||||
|
|
@ -291,9 +286,13 @@ pub fn write_samples_to_dict(project_root: &Path) {
|
|||
if !dict_entry.number_tokens.values().all(|x| uniq.insert(x)) {
|
||||
println!("Warning: collected duplicate tokens for {lang}");
|
||||
}
|
||||
let mut uniq = HashSet::new();
|
||||
if !dict_entry.number_nd_tokens.values().all(|x| uniq.insert(x)) {
|
||||
println!("Warning: collected duplicate nd_tokens for {lang}");
|
||||
}
|
||||
}
|
||||
|
||||
util::write_dict(project_root, &dict);
|
||||
util::write_dict(project_root, dict);
|
||||
}
|
||||
|
||||
fn get_mag(n: u64) -> u8 {
|
||||
|
|
@ -304,59 +303,59 @@ fn get_mag(n: u64) -> u8 {
|
|||
YouTube channel videos response
|
||||
*/
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct Channel {
|
||||
contents: Contents,
|
||||
header: ChannelHeader,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct ChannelHeader {
|
||||
c4_tabbed_header_renderer: HeaderRenderer,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct HeaderRenderer {
|
||||
subscriber_count_text: Text,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct Contents {
|
||||
two_column_browse_results_renderer: TabsRenderer,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct TabsRenderer {
|
||||
#[serde_as(as = "VecSkipError<_>")]
|
||||
tabs: Vec<TabRendererWrap>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct TabRendererWrap {
|
||||
tab_renderer: TabRenderer,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct TabRenderer {
|
||||
content: RichGridRendererWrap,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct RichGridRendererWrap {
|
||||
rich_grid_renderer: RichGridRenderer,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct RichGridRenderer {
|
||||
#[serde_as(as = "VecSkipError<_>")]
|
||||
|
|
@ -366,25 +365,25 @@ struct RichGridRenderer {
|
|||
header: Option<RichGridHeader>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct RichItemRendererWrap {
|
||||
rich_item_renderer: RichItemRenderer,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct RichItemRenderer {
|
||||
content: VideoRendererWrap,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct VideoRendererWrap {
|
||||
video_renderer: VideoRenderer,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct VideoRenderer {
|
||||
/// `24,194 views`
|
||||
|
|
@ -393,65 +392,100 @@ struct VideoRenderer {
|
|||
short_view_count_text: Text,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct RichGridHeader {
|
||||
feed_filter_chip_bar_renderer: ChipBar,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct ChipBar {
|
||||
contents: Vec<Chip>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct Chip {
|
||||
chip_cloud_chip_renderer: ChipRenderer,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct ChipRenderer {
|
||||
navigation_endpoint: NavigationEndpoint,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct NavigationEndpoint {
|
||||
continuation_command: ContinuationCommand,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct ContinuationCommand {
|
||||
token: String,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct ContinuationResponse {
|
||||
// #[serde_as(as = "VecSkipError<_>")]
|
||||
on_response_received_actions: Vec<ContinuationAction>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct ContinuationAction {
|
||||
reload_continuation_items_command: ContinuationItemsWrap,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct ContinuationItemsWrap {
|
||||
#[serde_as(as = "VecSkipError<_>")]
|
||||
continuation_items: Vec<RichItemRendererWrap>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
/*
|
||||
YouTube Music channel data
|
||||
*/
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct MusicChannel {
|
||||
header: MusicHeader,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct MusicHeader {
|
||||
#[serde(alias = "musicVisualHeaderRenderer")]
|
||||
music_immersive_header_renderer: MusicHeaderRenderer,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct MusicHeaderRenderer {
|
||||
subscription_button: SubscriptionButton,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct SubscriptionButton {
|
||||
subscribe_button_renderer: SubscriptionButtonRenderer,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct SubscriptionButtonRenderer {
|
||||
subscriber_count_text: TextRuns,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct ChannelData {
|
||||
view_counts: BTreeMap<u64, String>,
|
||||
subscriber_count: String,
|
||||
|
|
@ -460,7 +494,7 @@ struct ChannelData {
|
|||
async fn get_channel(query: &RustyPipeQuery, channel_id: &str) -> Result<ChannelData> {
|
||||
let resp = query
|
||||
.raw(
|
||||
ClientType::DesktopMusic,
|
||||
ClientType::Desktop,
|
||||
"browse",
|
||||
&QBrowse {
|
||||
context: query.get_context(ClientType::Desktop, true, None).await,
|
||||
|
|
@ -540,18 +574,31 @@ async fn get_channel(query: &RustyPipeQuery, channel_id: &str) -> Result<Channel
|
|||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use rustypipe::client::RustyPipe;
|
||||
async fn music_channel_subscribers(query: &RustyPipeQuery, channel_id: &str) -> Result<String> {
|
||||
let resp = query
|
||||
.raw(
|
||||
ClientType::DesktopMusic,
|
||||
"browse",
|
||||
&QBrowse {
|
||||
context: query
|
||||
.get_context(ClientType::DesktopMusic, true, None)
|
||||
.await,
|
||||
browse_id: channel_id,
|
||||
params: None,
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn t() {
|
||||
let rp = RustyPipe::new();
|
||||
let x = get_channel(&rp.query(), "UCQXYK94vDqOEkPbTCyL0OjA")
|
||||
.await
|
||||
.unwrap();
|
||||
dbg!(&x);
|
||||
}
|
||||
let channel = serde_json::from_str::<MusicChannel>(&resp)?;
|
||||
channel
|
||||
.header
|
||||
.music_immersive_header_renderer
|
||||
.subscription_button
|
||||
.subscribe_button_renderer
|
||||
.subscriber_count_text
|
||||
.runs
|
||||
.into_iter()
|
||||
.next()
|
||||
.map(|t| t.text)
|
||||
.ok_or_else(|| anyhow::anyhow!("no text"))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -291,5 +291,5 @@ pub fn write_samples_to_dict(project_root: &Path) {
|
|||
dict_entry.date_order = num_order;
|
||||
}
|
||||
|
||||
util::write_dict(project_root, &dict);
|
||||
util::write_dict(project_root, dict);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -73,6 +73,10 @@ pub(crate) struct Entry {
|
|||
///
|
||||
/// Format: Parsed token -> decimal power
|
||||
pub number_tokens: phf::Map<&'static str, u8>,
|
||||
/// Tokens for parsing number strings with no digits (e.g. "No videos")
|
||||
///
|
||||
/// Format: Parsed token -> value
|
||||
pub number_nd_tokens: phf::Map<&'static str, u8>,
|
||||
/// Names of album types (Album, Single, ...)
|
||||
///
|
||||
/// Format: Parsed text -> Album type
|
||||
|
|
@ -138,6 +142,12 @@ pub(crate) fn entry(lang: Language) -> Entry {
|
|||
number_tokens.entry(txt, &mag.to_string());
|
||||
});
|
||||
|
||||
// Number nd tokens
|
||||
let mut number_nd_tokens = phf_codegen::Map::<&str>::new();
|
||||
entry.number_nd_tokens.iter().for_each(|(txt, mag)| {
|
||||
number_nd_tokens.entry(txt, &mag.to_string());
|
||||
});
|
||||
|
||||
// Album types
|
||||
let mut album_types = phf_codegen::Map::<&str>::new();
|
||||
entry.album_types.iter().for_each(|(txt, album_type)| {
|
||||
|
|
@ -148,10 +158,11 @@ pub(crate) fn entry(lang: Language) -> Entry {
|
|||
let code_ta_nd_tokens = &ta_nd_tokens.build().to_string().replace('\n', "\n ");
|
||||
let code_months = &months.build().to_string().replace('\n', "\n ");
|
||||
let code_number_tokens = &number_tokens.build().to_string().replace('\n', "\n ");
|
||||
let code_number_nd_tokens = &number_nd_tokens.build().to_string().replace('\n', "\n ");
|
||||
let code_album_types = &album_types.build().to_string().replace('\n', "\n ");
|
||||
|
||||
let _ = write!(code_timeago_tokens, "{} => Entry {{\n timeago_tokens: {},\n date_order: {},\n months: {},\n timeago_nd_tokens: {},\n comma_decimal: {:?},\n number_tokens: {},\n album_types: {},\n }},\n ",
|
||||
selector, code_ta_tokens, date_order, code_months, code_ta_nd_tokens, entry.comma_decimal, code_number_tokens, code_album_types);
|
||||
write!(code_timeago_tokens, "{} => Entry {{\n timeago_tokens: {},\n date_order: {},\n months: {},\n timeago_nd_tokens: {},\n comma_decimal: {:?},\n number_tokens: {},\n number_nd_tokens: {},\n album_types: {},\n }},\n ",
|
||||
selector, code_ta_tokens, date_order, code_months, code_ta_nd_tokens, entry.comma_decimal, code_number_tokens, code_number_nd_tokens, code_album_types).unwrap();
|
||||
});
|
||||
|
||||
code_timeago_tokens = code_timeago_tokens.trim_end().to_owned() + "\n }\n}\n";
|
||||
|
|
|
|||
|
|
@ -12,8 +12,11 @@ use rustypipe::{client::YTContext, model::AlbumType, param::Language};
|
|||
use serde::{Deserialize, Serialize};
|
||||
|
||||
static DICT_PATH: Lazy<PathBuf> = Lazy::new(|| path!("testfiles" / "dict" / "dictionary.json"));
|
||||
static DICT_OVERRIDE_PATH: Lazy<PathBuf> =
|
||||
Lazy::new(|| path!("testfiles" / "dict" / "dictionary_override.json"));
|
||||
|
||||
type Dictionary = BTreeMap<Language, DictEntry>;
|
||||
type DictionaryOverride = BTreeMap<Language, DictOverrideEntry>;
|
||||
|
||||
#[derive(Debug, Default, Serialize, Deserialize)]
|
||||
#[serde(default)]
|
||||
|
|
@ -62,6 +65,13 @@ pub struct DictEntry {
|
|||
pub album_types: BTreeMap<String, AlbumType>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Serialize, Deserialize)]
|
||||
#[serde(default)]
|
||||
pub struct DictOverrideEntry {
|
||||
pub number_tokens: BTreeMap<String, Option<u8>>,
|
||||
pub number_nd_tokens: BTreeMap<String, Option<u8>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct QBrowse<'a> {
|
||||
|
|
@ -95,10 +105,41 @@ pub fn read_dict(project_root: &Path) -> Dictionary {
|
|||
serde_json::from_reader(BufReader::new(json_file)).unwrap()
|
||||
}
|
||||
|
||||
pub fn write_dict(project_root: &Path, dict: &Dictionary) {
|
||||
pub fn read_dict_override(project_root: &Path) -> DictionaryOverride {
|
||||
let json_path = path!(project_root / *DICT_OVERRIDE_PATH);
|
||||
let json_file = File::open(json_path).unwrap();
|
||||
serde_json::from_reader(BufReader::new(json_file)).unwrap()
|
||||
}
|
||||
|
||||
pub fn write_dict(project_root: &Path, dict: Dictionary) {
|
||||
let dict_override = read_dict_override(project_root);
|
||||
|
||||
let json_path = path!(project_root / *DICT_PATH);
|
||||
let json_file = File::create(json_path).unwrap();
|
||||
serde_json::to_writer_pretty(json_file, dict).unwrap();
|
||||
|
||||
fn apply_map<K: Clone + Ord, V: Clone>(map: &mut BTreeMap<K, V>, or: &BTreeMap<K, Option<V>>) {
|
||||
or.iter().for_each(|(key, val)| match val {
|
||||
Some(val) => {
|
||||
map.insert(key.clone(), val.clone());
|
||||
}
|
||||
None => {
|
||||
map.remove(key);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
let dict: Dictionary = dict
|
||||
.into_iter()
|
||||
.map(|(lang, mut entry)| {
|
||||
if let Some(or) = dict_override.get(&lang) {
|
||||
apply_map(&mut entry.number_tokens, &or.number_tokens);
|
||||
apply_map(&mut entry.number_nd_tokens, &or.number_nd_tokens);
|
||||
}
|
||||
(lang, entry)
|
||||
})
|
||||
.collect();
|
||||
|
||||
serde_json::to_writer_pretty(json_file, &dict).unwrap();
|
||||
}
|
||||
|
||||
pub fn filter_datestr(string: &str) -> String {
|
||||
|
|
@ -133,6 +174,7 @@ pub fn filter_largenumstr(string: &str) -> String {
|
|||
| ','
|
||||
) && !c.is_ascii_digit()
|
||||
})
|
||||
.flat_map(char::to_lowercase)
|
||||
.collect()
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -30,3 +30,5 @@ build a dictionary.
|
|||
- Examples: "1.4M views"
|
||||
- There is an exception for the value 0 ("no views") and in some languages for the value
|
||||
1 (pt: "Um vídeo")
|
||||
- Special case: Language "gu", "જોવાયાની સંખ્યા" = "no views", contains no unique tokens
|
||||
to parse
|
||||
|
|
|
|||
|
|
@ -515,7 +515,7 @@ impl<T> YouTubeListMapper<T> {
|
|||
publish_date_txt: pub_date_txt,
|
||||
view_count: video
|
||||
.view_count_text
|
||||
.map(|txt| util::parse_large_numstr(&txt, lang).unwrap_or_default()),
|
||||
.and_then(|txt| util::parse_large_numstr_or_warn(&txt, lang, &mut self.warnings)),
|
||||
is_live: false,
|
||||
is_short: true,
|
||||
is_upcoming: false,
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
160
src/util/mod.rs
160
src/util/mod.rs
|
|
@ -290,87 +290,83 @@ pub fn parse_large_numstr<F>(string: &str, lang: Language) -> Option<F>
|
|||
where
|
||||
F: TryFrom<u64>,
|
||||
{
|
||||
// Special case for Gujarati: the "no views" text does not contain
|
||||
// any parseable tokens: the 2 words occur in any view count text.
|
||||
// This may be a translation error.
|
||||
if lang == Language::Gu && string == "જોવાયાની સંખ્યા" {
|
||||
return 0.try_into().ok();
|
||||
}
|
||||
|
||||
let dict_entry = dictionary::entry(lang);
|
||||
let by_char = lang_by_char(lang) || lang == Language::Ko;
|
||||
let decimal_point = match dict_entry.comma_decimal {
|
||||
true => ',',
|
||||
false => '.',
|
||||
};
|
||||
|
||||
let (num, mut exp, filtered) = {
|
||||
let mut buf = String::new();
|
||||
let mut filtered = String::new();
|
||||
let mut exp = 0;
|
||||
let mut after_point = false;
|
||||
for c in string.chars() {
|
||||
if c.is_ascii_digit() {
|
||||
buf.push(c);
|
||||
let mut digits = String::new();
|
||||
let mut filtered = String::new();
|
||||
let mut exp = 0;
|
||||
let mut after_point = false;
|
||||
|
||||
if after_point {
|
||||
exp -= 1;
|
||||
}
|
||||
} else if c == decimal_point {
|
||||
after_point = true;
|
||||
} else if !matches!(
|
||||
c,
|
||||
'\u{200b}'
|
||||
| '\u{202b}'
|
||||
| '\u{202c}'
|
||||
| '\u{202e}'
|
||||
| '\u{200e}'
|
||||
| '\u{200f}'
|
||||
| '.'
|
||||
| ','
|
||||
) {
|
||||
filtered.push(c);
|
||||
for c in string.chars() {
|
||||
if c.is_ascii_digit() {
|
||||
digits.push(c);
|
||||
|
||||
if after_point {
|
||||
exp -= 1;
|
||||
}
|
||||
} else if c == decimal_point {
|
||||
after_point = true;
|
||||
} else if !matches!(
|
||||
c,
|
||||
'\u{200b}' | '\u{202b}' | '\u{202c}' | '\u{202e}' | '\u{200e}' | '\u{200f}' | '.' | ','
|
||||
) {
|
||||
c.to_lowercase().for_each(|c| filtered.push(c));
|
||||
}
|
||||
if buf.is_empty() {
|
||||
// TODO: integrate into dictionary
|
||||
if lang == Language::Ar && string.contains("واحد")
|
||||
|| lang == Language::Iw && string.contains("אחד")
|
||||
|| lang == Language::As && string.contains('১') // ১টা
|
||||
|| lang == Language::Bn && string.contains('১')
|
||||
|| lang == Language::Fa && string.contains('۱')
|
||||
|| lang == Language::Is && (string.contains("Eitt ") || string.contains("Einn "))
|
||||
|| lang == Language::My && string.contains('၁')
|
||||
|| lang == Language::No && string.contains("Én ")
|
||||
|| lang == Language::Pt && string.contains("Um ")
|
||||
|| lang == Language::Ro && string.contains("Un ")
|
||||
{
|
||||
return 1.try_into().ok();
|
||||
}
|
||||
|
||||
return None;
|
||||
} else {
|
||||
(buf.parse::<u64>().ok()?, exp, filtered)
|
||||
}
|
||||
};
|
||||
|
||||
let lookup_token = |token: &str| match token {
|
||||
"K" | "k" => Some(3),
|
||||
_ => dict_entry.number_tokens.get(token).map(|t| *t as i32),
|
||||
};
|
||||
|
||||
if lang_by_char(lang) || lang == Language::Ko {
|
||||
exp += filtered
|
||||
.chars()
|
||||
.filter_map(|token| lookup_token(&token.to_string()))
|
||||
.sum::<i32>();
|
||||
} else {
|
||||
exp += filtered
|
||||
.split_whitespace()
|
||||
.filter_map(lookup_token)
|
||||
.sum::<i32>();
|
||||
}
|
||||
|
||||
F::try_from(some_or_bail!(
|
||||
num.checked_mul(some_or_bail!(
|
||||
(10_u64).checked_pow(ok_or_bail!(exp.try_into(), None)),
|
||||
if digits.is_empty() {
|
||||
if by_char {
|
||||
filtered
|
||||
.chars()
|
||||
.find_map(|c| dict_entry.number_nd_tokens.get(&c.to_string()))
|
||||
.and_then(|n| (*n as u64).try_into().ok())
|
||||
} else {
|
||||
filtered
|
||||
.split_whitespace()
|
||||
.find_map(|token| dict_entry.number_nd_tokens.get(token))
|
||||
.and_then(|n| (*n as u64).try_into().ok())
|
||||
}
|
||||
} else {
|
||||
let num = digits.parse::<u64>().ok()?;
|
||||
|
||||
let lookup_token = |token: &str| match token {
|
||||
"k" => Some(3),
|
||||
_ => dict_entry.number_tokens.get(token).map(|t| *t as i32),
|
||||
};
|
||||
|
||||
if by_char {
|
||||
exp += filtered
|
||||
.chars()
|
||||
.filter_map(|token| lookup_token(&token.to_string()))
|
||||
.sum::<i32>();
|
||||
} else {
|
||||
exp += filtered
|
||||
.split_whitespace()
|
||||
.filter_map(lookup_token)
|
||||
.sum::<i32>();
|
||||
}
|
||||
|
||||
F::try_from(some_or_bail!(
|
||||
num.checked_mul(some_or_bail!(
|
||||
(10_u64).checked_pow(ok_or_bail!(exp.try_into(), None)),
|
||||
None
|
||||
)),
|
||||
None
|
||||
)),
|
||||
None
|
||||
))
|
||||
.ok()
|
||||
))
|
||||
.ok()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_large_numstr_or_warn<F>(
|
||||
|
|
@ -516,9 +512,10 @@ pub(crate) mod tests {
|
|||
#[case(
|
||||
Language::Iw,
|
||||
"\u{200f}\u{202b}3.36M\u{200f}\u{202c}\u{200f} \u{200f}מנויים\u{200f}",
|
||||
3360000
|
||||
3_360_000
|
||||
)]
|
||||
fn t_parse_large_numstr_1(#[case] lang: Language, #[case] string: &str, #[case] expect: u64) {
|
||||
#[case(Language::As, "১ জন গ্ৰাহক", 1)]
|
||||
fn t_parse_large_numstr(#[case] lang: Language, #[case] string: &str, #[case] expect: u64) {
|
||||
let res = parse_large_numstr::<u64>(string, lang).unwrap();
|
||||
assert_eq!(res, expect);
|
||||
}
|
||||
|
|
@ -527,20 +524,6 @@ pub(crate) mod tests {
|
|||
fn t_parse_large_numstr_samples() {
|
||||
let json_path = path!(*TESTFILES / "dict" / "large_number_samples.json");
|
||||
let json_file = File::open(json_path).unwrap();
|
||||
let number_samples: BTreeMap<Language, BTreeMap<String, (String, u64)>> =
|
||||
serde_json::from_reader(BufReader::new(json_file)).unwrap();
|
||||
|
||||
number_samples.iter().for_each(|(lang, entry)| {
|
||||
entry.iter().for_each(|(_, (txt, expect))| {
|
||||
testcase_parse_large_numstr(txt, *lang, *expect);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn t_parse_large_numstr_samples2() {
|
||||
let json_path = path!(*TESTFILES / "dict" / "large_number_samples_all.json");
|
||||
let json_file = File::open(json_path).unwrap();
|
||||
let number_samples: BTreeMap<Language, BTreeMap<String, u64>> =
|
||||
serde_json::from_reader(BufReader::new(json_file)).unwrap();
|
||||
|
||||
|
|
@ -565,8 +548,9 @@ pub(crate) mod tests {
|
|||
}
|
||||
};
|
||||
|
||||
// TODO: add support for zero values
|
||||
let res = parse_large_numstr::<u64>(string, lang).unwrap_or_default();
|
||||
assert_eq!(res, rounded, "{string} (lang: {lang}, exact: {expect})");
|
||||
let emsg = format!("{string} (lang: {lang}, exact: {expect})");
|
||||
|
||||
let res = parse_large_numstr::<u64>(string, lang).expect(&emsg);
|
||||
assert_eq!(res, rounded, "{emsg}");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,6 +41,9 @@
|
|||
"m": 6,
|
||||
"mjd": 9
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"nie": 0
|
||||
},
|
||||
"album_types": {
|
||||
"album": "Album",
|
||||
"drama": "Show",
|
||||
|
|
@ -93,6 +96,9 @@
|
|||
"ሺ": 3,
|
||||
"ቢ": 9
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"የለዉም": 0
|
||||
},
|
||||
"album_types": {
|
||||
"ትዕይንት": "Show",
|
||||
"ነጠላ": "Single",
|
||||
|
|
@ -143,6 +149,10 @@
|
|||
"مليار": 9,
|
||||
"مليون": 6
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"لا": 0,
|
||||
"واحد": 1
|
||||
},
|
||||
"album_types": {
|
||||
"أغنية منفردة": "Single",
|
||||
"ألبوم": "Album",
|
||||
|
|
@ -172,6 +182,7 @@
|
|||
"comma_decimal": false,
|
||||
"number_tokens": {
|
||||
"কোঃটা": 9,
|
||||
"নিঃ": 6,
|
||||
"নিঃটা": 6,
|
||||
"নিযুত": 6,
|
||||
"নিযুতটা": 6,
|
||||
|
|
@ -180,8 +191,11 @@
|
|||
"লাখটা": 5,
|
||||
"হা": 3,
|
||||
"হাজাৰ": 3,
|
||||
"হাজাৰটা": 3,
|
||||
"নিঃ": 6
|
||||
"হাজাৰটা": 3
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"নাই": 0,
|
||||
"১": 1
|
||||
},
|
||||
"album_types": {
|
||||
"ep": "Ep",
|
||||
|
|
@ -229,6 +243,9 @@
|
|||
"mln": 6,
|
||||
"mlrd": 9
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"yoxdur": 0
|
||||
},
|
||||
"album_types": {
|
||||
"albom": "Album",
|
||||
"audio kitab": "Audiobook",
|
||||
|
|
@ -291,6 +308,9 @@
|
|||
"млрд": 9,
|
||||
"тыс": 3
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"няма": 0
|
||||
},
|
||||
"album_types": {
|
||||
"альбом": "Album",
|
||||
"аўдыякніга": "Audiobook",
|
||||
|
|
@ -330,6 +350,9 @@
|
|||
"млрд": 9,
|
||||
"хил": 3
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"няма": 0
|
||||
},
|
||||
"album_types": {
|
||||
"албум": "Album",
|
||||
"аудиокнига": "Audiobook",
|
||||
|
|
@ -379,6 +402,10 @@
|
|||
"হা": 3,
|
||||
"হাটি": 3
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"০": 0,
|
||||
"১": 1
|
||||
},
|
||||
"album_types": {
|
||||
"অডিওবুক": "Audiobook",
|
||||
"অ্যালবাম": "Album",
|
||||
|
|
@ -437,6 +464,9 @@
|
|||
"mil": 6,
|
||||
"mlr": 9
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"nema": 0
|
||||
},
|
||||
"album_types": {
|
||||
"album": "Album",
|
||||
"audio knjiga": "Audiobook",
|
||||
|
|
@ -485,9 +515,11 @@
|
|||
},
|
||||
"comma_decimal": true,
|
||||
"number_tokens": {
|
||||
"M": 6,
|
||||
"m": 3,
|
||||
"kM": 9
|
||||
"km": 9,
|
||||
"m": 6
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"sense": 0
|
||||
},
|
||||
"album_types": {
|
||||
"audiollibre": "Audiobook",
|
||||
|
|
@ -532,6 +564,7 @@
|
|||
"mld": 9,
|
||||
"tis": 3
|
||||
},
|
||||
"number_nd_tokens": {},
|
||||
"album_types": {
|
||||
"album": "Album",
|
||||
"audiokniha": "Audiobook",
|
||||
|
|
@ -582,6 +615,9 @@
|
|||
"mia": 9,
|
||||
"mio": 6
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"ingen": 0
|
||||
},
|
||||
"album_types": {
|
||||
"album": "Album",
|
||||
"ep": "Ep",
|
||||
|
|
@ -617,8 +653,11 @@
|
|||
},
|
||||
"comma_decimal": true,
|
||||
"number_tokens": {
|
||||
"Mio": 6,
|
||||
"Mrd": 9
|
||||
"mio": 6,
|
||||
"mrd": 9
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"keine": 0
|
||||
},
|
||||
"album_types": {
|
||||
"album": "Album",
|
||||
|
|
@ -672,6 +711,9 @@
|
|||
"εκ": 6,
|
||||
"χιλ": 3
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"καμία": 0
|
||||
},
|
||||
"album_types": {
|
||||
"ep": "Ep",
|
||||
"single": "Single",
|
||||
|
|
@ -681,7 +723,10 @@
|
|||
}
|
||||
},
|
||||
"en": {
|
||||
"equivalent": ["en-GB", "en-IN"],
|
||||
"equivalent": [
|
||||
"en-GB",
|
||||
"en-IN"
|
||||
],
|
||||
"by_char": false,
|
||||
"timeago_tokens": {
|
||||
"day": "D",
|
||||
|
|
@ -721,10 +766,13 @@
|
|||
},
|
||||
"comma_decimal": false,
|
||||
"number_tokens": {
|
||||
"B": 9,
|
||||
"M": 6,
|
||||
"b": 9,
|
||||
"crore": 7,
|
||||
"lakh": 5
|
||||
"lakh": 5,
|
||||
"m": 6
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"no": 0
|
||||
},
|
||||
"album_types": {
|
||||
"album": "Album",
|
||||
|
|
@ -774,9 +822,10 @@
|
|||
},
|
||||
"comma_decimal": true,
|
||||
"number_tokens": {
|
||||
"M": 6,
|
||||
"m": 6,
|
||||
"mil": 3
|
||||
},
|
||||
"number_nd_tokens": {},
|
||||
"album_types": {
|
||||
"audiodrama": "Show",
|
||||
"audiolibro": "Audiobook",
|
||||
|
|
@ -786,7 +835,9 @@
|
|||
}
|
||||
},
|
||||
"es-US": {
|
||||
"equivalent": ["es-419"],
|
||||
"equivalent": [
|
||||
"es-419"
|
||||
],
|
||||
"by_char": false,
|
||||
"timeago_tokens": {
|
||||
"año": "Y",
|
||||
|
|
@ -825,9 +876,12 @@
|
|||
},
|
||||
"comma_decimal": false,
|
||||
"number_tokens": {
|
||||
"M": 6,
|
||||
"m": 6,
|
||||
"mil": 3
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"sin": 0
|
||||
},
|
||||
"album_types": {
|
||||
"audiolibro": "Audiobook",
|
||||
"ep": "Ep",
|
||||
|
|
@ -882,6 +936,9 @@
|
|||
"mln": 6,
|
||||
"tuh": 3
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"pole": 0
|
||||
},
|
||||
"album_types": {
|
||||
"album": "Album",
|
||||
"audioraamat": "Audiobook",
|
||||
|
|
@ -926,7 +983,10 @@
|
|||
},
|
||||
"comma_decimal": true,
|
||||
"number_tokens": {
|
||||
"M": 6
|
||||
"m": 6
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"ez": 0
|
||||
},
|
||||
"album_types": {
|
||||
"albuma": "Album",
|
||||
|
|
@ -973,6 +1033,10 @@
|
|||
"میلیون": 6,
|
||||
"هزار": 3
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"بدون": 0,
|
||||
"۱": 1
|
||||
},
|
||||
"album_types": {
|
||||
"آلبوم": "Album",
|
||||
"تک آهنگ": "Single",
|
||||
|
|
@ -1012,6 +1076,10 @@
|
|||
"mrd": 9,
|
||||
"t": 3
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"ei": 0,
|
||||
"katselukertoja": 0
|
||||
},
|
||||
"album_types": {
|
||||
"albumi": "Album",
|
||||
"ep": "Ep",
|
||||
|
|
@ -1053,8 +1121,11 @@
|
|||
},
|
||||
"comma_decimal": false,
|
||||
"number_tokens": {
|
||||
"B": 9,
|
||||
"M": 6
|
||||
"b": 9,
|
||||
"m": 6
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"walang": 0
|
||||
},
|
||||
"album_types": {
|
||||
"album": "Album",
|
||||
|
|
@ -1065,7 +1136,9 @@
|
|||
}
|
||||
},
|
||||
"fr": {
|
||||
"equivalent": ["fr-CA"],
|
||||
"equivalent": [
|
||||
"fr-CA"
|
||||
],
|
||||
"by_char": false,
|
||||
"timeago_tokens": {
|
||||
"an": "Y",
|
||||
|
|
@ -1104,9 +1177,13 @@
|
|||
},
|
||||
"comma_decimal": true,
|
||||
"number_tokens": {
|
||||
"G": 9,
|
||||
"M": 6,
|
||||
"Md": 9
|
||||
"g": 9,
|
||||
"m": 6,
|
||||
"md": 9
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"aucun": 0,
|
||||
"aucune": 0
|
||||
},
|
||||
"album_types": {
|
||||
"album": "Album",
|
||||
|
|
@ -1158,7 +1235,10 @@
|
|||
},
|
||||
"comma_decimal": true,
|
||||
"number_tokens": {
|
||||
"M": 6
|
||||
"m": 6
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"ningunha": 0
|
||||
},
|
||||
"album_types": {
|
||||
"audiolibro": "Audiobook",
|
||||
|
|
@ -1206,6 +1286,7 @@
|
|||
"લાખ": 5,
|
||||
"હજાર": 3
|
||||
},
|
||||
"number_nd_tokens": {},
|
||||
"album_types": {
|
||||
"ep": "Ep",
|
||||
"આલ્બમ": "Album",
|
||||
|
|
@ -1252,6 +1333,9 @@
|
|||
"लाख": 5,
|
||||
"हज़ार": 3
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"नहीं": 0
|
||||
},
|
||||
"album_types": {
|
||||
"ईपी": "Ep",
|
||||
"एल्बम": "Album",
|
||||
|
|
@ -1310,6 +1394,9 @@
|
|||
"mlr": 9,
|
||||
"tis": 3
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"nema": 0
|
||||
},
|
||||
"album_types": {
|
||||
"album": "Album",
|
||||
"audioknjiga": "Audiobook",
|
||||
|
|
@ -1360,9 +1447,12 @@
|
|||
},
|
||||
"comma_decimal": true,
|
||||
"number_tokens": {
|
||||
"E": 3,
|
||||
"M": 6,
|
||||
"Mrd": 9
|
||||
"e": 3,
|
||||
"m": 6,
|
||||
"mrd": 9
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"nincs": 0
|
||||
},
|
||||
"album_types": {
|
||||
"album": "Album",
|
||||
|
|
@ -1409,6 +1499,10 @@
|
|||
"մլն": 6,
|
||||
"մլրդ": 9
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"դիտումներ": 0,
|
||||
"չկան": 0
|
||||
},
|
||||
"album_types": {
|
||||
"ep": "Ep",
|
||||
"ալբոմ": "Album",
|
||||
|
|
@ -1450,10 +1544,13 @@
|
|||
},
|
||||
"comma_decimal": true,
|
||||
"number_tokens": {
|
||||
"M": 9,
|
||||
"jt": 6,
|
||||
"m": 9,
|
||||
"rb": 3
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"belum": 0
|
||||
},
|
||||
"album_types": {
|
||||
"acara": "Show",
|
||||
"album": "Album",
|
||||
|
|
@ -1509,6 +1606,10 @@
|
|||
"ma": 9,
|
||||
"þ": 3
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"einn": 1,
|
||||
"ekkert": 0
|
||||
},
|
||||
"album_types": {
|
||||
"ep": "Ep",
|
||||
"hljóðbók": "Audiobook",
|
||||
|
|
@ -1557,8 +1658,11 @@
|
|||
},
|
||||
"comma_decimal": true,
|
||||
"number_tokens": {
|
||||
"Mln": 6,
|
||||
"Mrd": 9
|
||||
"mln": 6,
|
||||
"mrd": 9
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"nessuna": 0
|
||||
},
|
||||
"album_types": {
|
||||
"album": "Album",
|
||||
|
|
@ -1615,9 +1719,12 @@
|
|||
},
|
||||
"comma_decimal": false,
|
||||
"number_tokens": {
|
||||
"B": 9,
|
||||
"K": 3,
|
||||
"M": 6
|
||||
"b": 9,
|
||||
"m": 6
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"אחד": 1,
|
||||
"אין": 0
|
||||
},
|
||||
"album_types": {
|
||||
"אלבום": "Album",
|
||||
|
|
@ -1650,6 +1757,7 @@
|
|||
"万": 4,
|
||||
"億": 8
|
||||
},
|
||||
"number_nd_tokens": {},
|
||||
"album_types": {
|
||||
"ep": "Ep",
|
||||
"アルバム": "Album",
|
||||
|
|
@ -1697,6 +1805,9 @@
|
|||
"მლნ": 6,
|
||||
"მლრდ": 9
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"არ": 0
|
||||
},
|
||||
"album_types": {
|
||||
"ალბომი": "Album",
|
||||
"აუდიოწიგნი": "Audiobook",
|
||||
|
|
@ -1743,6 +1854,9 @@
|
|||
"млрд": 9,
|
||||
"мың": 3
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"ешкім": 0
|
||||
},
|
||||
"album_types": {
|
||||
"ep": "Ep",
|
||||
"альбом": "Album",
|
||||
|
|
@ -1790,6 +1904,7 @@
|
|||
"ពាន់": 3,
|
||||
"លាន": 6
|
||||
},
|
||||
"number_nd_tokens": {},
|
||||
"album_types": {
|
||||
"ep": "Ep",
|
||||
"កម្មវិធីទូរទស្សន៍": "Show",
|
||||
|
|
@ -1843,6 +1958,9 @@
|
|||
"ಕೋಟಿ": 7,
|
||||
"ಲಕ್ಷ": 5
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"ವೀಕ್ಷಣೆಗಳಿಲ್ಲ": 0
|
||||
},
|
||||
"album_types": {
|
||||
"ep": "Ep",
|
||||
"ಆಡಿಯೋಬುಕ್": "Audiobook",
|
||||
|
|
@ -1866,8 +1984,8 @@
|
|||
"date_order": "YMD",
|
||||
"months": {},
|
||||
"timeago_nd_tokens": {
|
||||
"오늘": "0D",
|
||||
"어제": "1D"
|
||||
"어제": "1D",
|
||||
"오늘": "0D"
|
||||
},
|
||||
"comma_decimal": false,
|
||||
"number_tokens": {
|
||||
|
|
@ -1875,6 +1993,9 @@
|
|||
"억": 8,
|
||||
"천": 3
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"없": 0
|
||||
},
|
||||
"album_types": {
|
||||
"ep": "Ep",
|
||||
"싱글": "Single",
|
||||
|
|
@ -1920,6 +2041,9 @@
|
|||
"млд": 9,
|
||||
"млн": 6
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"эч": 0
|
||||
},
|
||||
"album_types": {
|
||||
"альбом": "Album",
|
||||
"аудиокитеп": "Audiobook",
|
||||
|
|
@ -1968,6 +2092,9 @@
|
|||
"ພັນ": 3,
|
||||
"ລ້ານ": 6
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"ຍັງບໍ່ມີຄົນເບິ່ງເທື່ອ": 0
|
||||
},
|
||||
"album_types": {
|
||||
"ep": "Ep",
|
||||
"ຊິງເກິນ": "Single",
|
||||
|
|
@ -2017,6 +2144,9 @@
|
|||
"mlrd": 9,
|
||||
"tūkst": 3
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"nėra": 0
|
||||
},
|
||||
"album_types": {
|
||||
"albumas": "Album",
|
||||
"garsinė knyga": "Audiobook",
|
||||
|
|
@ -2069,6 +2199,9 @@
|
|||
"mljrd": 9,
|
||||
"tūkst": 3
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"nav": 0
|
||||
},
|
||||
"album_types": {
|
||||
"albums": "Album",
|
||||
"audiogrāmata": "Audiobook",
|
||||
|
|
@ -2104,11 +2237,14 @@
|
|||
},
|
||||
"comma_decimal": true,
|
||||
"number_tokens": {
|
||||
"М": 6,
|
||||
"илј": 3,
|
||||
"м": 6,
|
||||
"мил": 6,
|
||||
"милј": 9
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"нема": 0
|
||||
},
|
||||
"album_types": {
|
||||
"ep": "Ep",
|
||||
"албум": "Album",
|
||||
|
|
@ -2153,6 +2289,9 @@
|
|||
"കോടി": 7,
|
||||
"ലക്ഷം": 5
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"ഇല്ല": 0
|
||||
},
|
||||
"album_types": {
|
||||
"ep": "Ep",
|
||||
"ആല്ബം": "Album",
|
||||
|
|
@ -2187,6 +2326,9 @@
|
|||
"сая": 6,
|
||||
"тэрбум": 9
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"үзэлтгүй": 0
|
||||
},
|
||||
"album_types": {
|
||||
"ep": "Ep",
|
||||
"аудио ном": "Audiobook",
|
||||
|
|
@ -2243,6 +2385,9 @@
|
|||
"लाख": 5,
|
||||
"ह": 3
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"नाहीत": 0
|
||||
},
|
||||
"album_types": {
|
||||
"अल्बम": "Album",
|
||||
"ऑडिओबुक": "Audiobook",
|
||||
|
|
@ -2284,8 +2429,11 @@
|
|||
},
|
||||
"comma_decimal": false,
|
||||
"number_tokens": {
|
||||
"B": 9,
|
||||
"J": 6
|
||||
"b": 9,
|
||||
"j": 6
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"tiada": 0
|
||||
},
|
||||
"album_types": {
|
||||
"album": "Album",
|
||||
|
|
@ -2330,11 +2478,15 @@
|
|||
"comma_decimal": false,
|
||||
"number_tokens": {
|
||||
"ကုဋေ": 7,
|
||||
"ထ": 3,
|
||||
"ထောင်": 3,
|
||||
"သန်း": 6,
|
||||
"သိန်း": 5,
|
||||
"သောင်း": 4,
|
||||
"ထ": 3
|
||||
"သောင်း": 4
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"မရှိ": 0,
|
||||
"၁": 1
|
||||
},
|
||||
"album_types": {
|
||||
"ep": "Ep",
|
||||
|
|
@ -2350,12 +2502,12 @@
|
|||
"timeago_tokens": {
|
||||
"घण्टा": "h",
|
||||
"दिन": "D",
|
||||
"दिनअघि": "D",
|
||||
"महिना": "M",
|
||||
"मिनेट": "m",
|
||||
"वर्ष": "Y",
|
||||
"सेकेन्ड": "s",
|
||||
"हप्ता": "W",
|
||||
"दिनअघि": "D"
|
||||
"हप्ता": "W"
|
||||
},
|
||||
"date_order": "YD",
|
||||
"months": {
|
||||
|
|
@ -2383,6 +2535,9 @@
|
|||
"लाख": 5,
|
||||
"हजार": 3
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"छैन": 0
|
||||
},
|
||||
"album_types": {
|
||||
"ep": "Ep",
|
||||
"अडियोबुक": "Audiobook",
|
||||
|
|
@ -2432,6 +2587,9 @@
|
|||
"mld": 9,
|
||||
"mln": 6
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"geen": 0
|
||||
},
|
||||
"album_types": {
|
||||
"aflevering": "Show",
|
||||
"album": "Album",
|
||||
|
|
@ -2483,6 +2641,9 @@
|
|||
"mill": 6,
|
||||
"mrd": 9
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"ingen": 0
|
||||
},
|
||||
"album_types": {
|
||||
"album": "Album",
|
||||
"ep": "Ep",
|
||||
|
|
@ -2525,15 +2686,18 @@
|
|||
},
|
||||
"comma_decimal": false,
|
||||
"number_tokens": {
|
||||
"ନି": 6,
|
||||
"ନିଜଣ": 6,
|
||||
"ନିଟି": 6,
|
||||
"ବି": 9,
|
||||
"ବିଜଣ": 9,
|
||||
"ବିଟି": 9,
|
||||
"ହଜଣ": 3,
|
||||
"ହଟି": 3,
|
||||
"ହ": 3,
|
||||
"ନି": 6,
|
||||
"ବି": 9
|
||||
"ହଜଣ": 3,
|
||||
"ହଟି": 3
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"ନାହିଁ": 0
|
||||
},
|
||||
"album_types": {
|
||||
"ep": "Ep",
|
||||
|
|
@ -2584,6 +2748,9 @@
|
|||
"ਲੱਖ": 5,
|
||||
"ਹਜ਼ਾਰ": 3
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"ਨਹੀਂ": 0
|
||||
},
|
||||
"album_types": {
|
||||
"ep": "Ep",
|
||||
"ਆਡੀਓ-ਕਿਤਾਬ": "Audiobook",
|
||||
|
|
@ -2643,6 +2810,9 @@
|
|||
"mln": 6,
|
||||
"tys": 3
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"brak": 0
|
||||
},
|
||||
"album_types": {
|
||||
"album": "Album",
|
||||
"audiobook": "Audiobook",
|
||||
|
|
@ -2695,6 +2865,7 @@
|
|||
"mi": 6,
|
||||
"mil": 3
|
||||
},
|
||||
"number_nd_tokens": {},
|
||||
"album_types": {
|
||||
"audiolivro": "Audiobook",
|
||||
"ep": "Ep",
|
||||
|
|
@ -2730,10 +2901,11 @@
|
|||
},
|
||||
"comma_decimal": true,
|
||||
"number_tokens": {
|
||||
"M": 6,
|
||||
"mM": 9,
|
||||
"mil": 3
|
||||
"m": 6,
|
||||
"mil": 3,
|
||||
"mm": 9
|
||||
},
|
||||
"number_nd_tokens": {},
|
||||
"album_types": {
|
||||
"ep": "Ep",
|
||||
"livro áudio": "Audiobook",
|
||||
|
|
@ -2785,6 +2957,10 @@
|
|||
"mil": 6,
|
||||
"mld": 9
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"nicio": 0,
|
||||
"un": 1
|
||||
},
|
||||
"album_types": {
|
||||
"album": "Album",
|
||||
"carte audio": "Audiobook",
|
||||
|
|
@ -2843,6 +3019,7 @@
|
|||
"млрд": 9,
|
||||
"тыс": 3
|
||||
},
|
||||
"number_nd_tokens": {},
|
||||
"album_types": {
|
||||
"ep": "Ep",
|
||||
"альбом": "Album",
|
||||
|
|
@ -2888,6 +3065,9 @@
|
|||
"බි": 9,
|
||||
"මි": 6
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"නැත": 0
|
||||
},
|
||||
"album_types": {
|
||||
"ඇල්බමය": "Album",
|
||||
"තනි": "Single",
|
||||
|
|
@ -2930,6 +3110,9 @@
|
|||
"mld": 9,
|
||||
"tis": 3
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"žiadne": 0
|
||||
},
|
||||
"album_types": {
|
||||
"album": "Album",
|
||||
"audiokniha": "Audiobook",
|
||||
|
|
@ -2993,6 +3176,9 @@
|
|||
"mrd": 9,
|
||||
"tis": 3
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"brez": 0
|
||||
},
|
||||
"album_types": {
|
||||
"album": "Album",
|
||||
"ep": "Ep",
|
||||
|
|
@ -3041,6 +3227,9 @@
|
|||
"mld": 9,
|
||||
"mln": 6
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"nuk": 0
|
||||
},
|
||||
"album_types": {
|
||||
"album": "Album",
|
||||
"ep": "Ep",
|
||||
|
|
@ -3084,6 +3273,9 @@
|
|||
"млрд": 9,
|
||||
"хиљ": 3
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"нема": 0
|
||||
},
|
||||
"album_types": {
|
||||
"ep": "Ep",
|
||||
"албум": "Album",
|
||||
|
|
@ -3128,6 +3320,9 @@
|
|||
"mil": 6,
|
||||
"mlrd": 9
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"nema": 0
|
||||
},
|
||||
"album_types": {
|
||||
"album": "Album",
|
||||
"audio-knjiga": "Audiobook",
|
||||
|
|
@ -3178,6 +3373,9 @@
|
|||
"md": 9,
|
||||
"mn": 6
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"inga": 0
|
||||
},
|
||||
"album_types": {
|
||||
"album": "Album",
|
||||
"ep": "Ep",
|
||||
|
|
@ -3221,9 +3419,12 @@
|
|||
},
|
||||
"comma_decimal": false,
|
||||
"number_tokens": {
|
||||
"B": 9,
|
||||
"M": 6,
|
||||
"elfu": 3
|
||||
"b": 9,
|
||||
"elfu": 3,
|
||||
"m": 6
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"haijatazamwa": 0
|
||||
},
|
||||
"album_types": {
|
||||
"albamu": "Album",
|
||||
|
|
@ -3278,6 +3479,9 @@
|
|||
"கோடி": 7,
|
||||
"லட்சம்": 5
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"இல்லை": 0
|
||||
},
|
||||
"album_types": {
|
||||
"ep": "Ep",
|
||||
"ஆடியோ புத்தகம்": "Audiobook",
|
||||
|
|
@ -3331,6 +3535,9 @@
|
|||
"లక్ష": 5,
|
||||
"లక్షలు": 5
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"లేవు": 0
|
||||
},
|
||||
"album_types": {
|
||||
"ep": "Ep",
|
||||
"ఆడియోబుక్": "Audiobook",
|
||||
|
|
@ -3348,11 +3555,11 @@
|
|||
"นาทีที่ผ่านมา": "m",
|
||||
"ปีที่แล้ว": "Y",
|
||||
"วันที่ผ่านมา": "D",
|
||||
"วันที่แล้ว": "D",
|
||||
"วินาที": "s",
|
||||
"วินาทีที่ผ่านมา": "s",
|
||||
"สัปดาห์ที่ผ่านมา": "W",
|
||||
"เดือนที่ผ่านมา": "M",
|
||||
"วันที่แล้ว": "D"
|
||||
"เดือนที่ผ่านมา": "M"
|
||||
},
|
||||
"date_order": "DY",
|
||||
"months": {
|
||||
|
|
@ -3382,6 +3589,9 @@
|
|||
"หมื่นล้าน": 10,
|
||||
"แสน": 5
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"ไม่มีการดู": 0
|
||||
},
|
||||
"album_types": {
|
||||
"ep": "Ep",
|
||||
"ซิงเกิล": "Single",
|
||||
|
|
@ -3423,9 +3633,12 @@
|
|||
},
|
||||
"comma_decimal": true,
|
||||
"number_tokens": {
|
||||
"B": 3,
|
||||
"Mn": 6,
|
||||
"Mr": 9
|
||||
"b": 3,
|
||||
"mn": 6,
|
||||
"mr": 9
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"yok": 0
|
||||
},
|
||||
"album_types": {
|
||||
"albüm": "Album",
|
||||
|
|
@ -3485,6 +3698,9 @@
|
|||
"млрд": 9,
|
||||
"тис": 3
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"жодного": 0
|
||||
},
|
||||
"album_types": {
|
||||
"альбом": "Album",
|
||||
"аудіодрама": "Show",
|
||||
|
|
@ -3537,6 +3753,9 @@
|
|||
"کروڑ": 7,
|
||||
"ہزار": 3
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"نہیں": 0
|
||||
},
|
||||
"album_types": {
|
||||
"ep": "Ep",
|
||||
"آڈیو بک": "Audiobook",
|
||||
|
|
@ -3582,6 +3801,7 @@
|
|||
"mln": 6,
|
||||
"mlrd": 9
|
||||
},
|
||||
"number_nd_tokens": {},
|
||||
"album_types": {
|
||||
"albom": "Album",
|
||||
"audiokitob": "Audiobook",
|
||||
|
|
@ -3611,10 +3831,11 @@
|
|||
},
|
||||
"comma_decimal": true,
|
||||
"number_tokens": {
|
||||
"N": 3,
|
||||
"T": 9,
|
||||
"Tr": 6
|
||||
"n": 3,
|
||||
"t": 9,
|
||||
"tr": 6
|
||||
},
|
||||
"number_nd_tokens": {},
|
||||
"album_types": {
|
||||
"chương trình": "Show",
|
||||
"sách nói": "Audiobook",
|
||||
|
|
@ -3646,6 +3867,9 @@
|
|||
"万": 4,
|
||||
"亿": 8
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"无": 0
|
||||
},
|
||||
"album_types": {
|
||||
"专辑": "Album",
|
||||
"单曲": "Single",
|
||||
|
|
@ -3675,9 +3899,10 @@
|
|||
},
|
||||
"comma_decimal": false,
|
||||
"number_tokens": {
|
||||
"B": 9,
|
||||
"M": 6
|
||||
"b": 9,
|
||||
"m": 6
|
||||
},
|
||||
"number_nd_tokens": {},
|
||||
"album_types": {
|
||||
"ep": "Ep",
|
||||
"單曲": "Single",
|
||||
|
|
@ -3709,6 +3934,7 @@
|
|||
"億": 8,
|
||||
"萬": 4
|
||||
},
|
||||
"number_nd_tokens": {},
|
||||
"album_types": {
|
||||
"ep": "Ep",
|
||||
"單曲": "Single",
|
||||
|
|
@ -3757,8 +3983,11 @@
|
|||
},
|
||||
"comma_decimal": false,
|
||||
"number_tokens": {
|
||||
"B": 9,
|
||||
"M": 6
|
||||
"b": 9,
|
||||
"m": 6
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"akukho": 0
|
||||
},
|
||||
"album_types": {
|
||||
"bonisa": "Show",
|
||||
|
|
|
|||
163
testfiles/dict/dictionary_override.json
Normal file
163
testfiles/dict/dictionary_override.json
Normal file
|
|
@ -0,0 +1,163 @@
|
|||
{
|
||||
"af": {
|
||||
"number_nd_tokens": {
|
||||
"geen": null
|
||||
}
|
||||
},
|
||||
"am": {
|
||||
"number_nd_tokens": {
|
||||
"ምንም": null
|
||||
}
|
||||
},
|
||||
"as": {
|
||||
"number_tokens": {
|
||||
"লা": 5,
|
||||
"হা": 3,
|
||||
"শঃ": null
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"কোনো": null
|
||||
}
|
||||
},
|
||||
"bn": {
|
||||
"number_tokens": {
|
||||
"কোটি": 7,
|
||||
"শত": 2
|
||||
}
|
||||
},
|
||||
"es": {
|
||||
"number_tokens": {
|
||||
"m": 6,
|
||||
"mil": 3
|
||||
}
|
||||
},
|
||||
"es-US": {
|
||||
"number_tokens": {
|
||||
"m": 6,
|
||||
"mil": 3
|
||||
}
|
||||
},
|
||||
"et": {
|
||||
"number_nd_tokens": {
|
||||
"vaatamisi": null
|
||||
}
|
||||
},
|
||||
"eu": {
|
||||
"number_nd_tokens": {
|
||||
"dago": null,
|
||||
"ikustaldirik": null
|
||||
}
|
||||
},
|
||||
"fr": {
|
||||
"number_tokens": {
|
||||
"d’abonnés": null
|
||||
}
|
||||
},
|
||||
"hy": {
|
||||
"number_nd_tokens": {
|
||||
"Դիտումներ": null
|
||||
}
|
||||
},
|
||||
"is": {
|
||||
"number_nd_tokens": {
|
||||
"áskrifandi": null,
|
||||
"enn": null
|
||||
}
|
||||
},
|
||||
"iw": {
|
||||
"number_nd_tokens": {
|
||||
"מנוי": null
|
||||
}
|
||||
},
|
||||
"ka": {
|
||||
"number_nd_tokens": {
|
||||
"არის": null,
|
||||
"ნახვები": null
|
||||
}
|
||||
},
|
||||
"kk": {
|
||||
"number_nd_tokens": {
|
||||
"көрмеген": null
|
||||
}
|
||||
},
|
||||
"kn": {
|
||||
"number_nd_tokens": {
|
||||
"ಯಾವುದೇ": null
|
||||
}
|
||||
},
|
||||
"ko": {
|
||||
"number_nd_tokens": {
|
||||
"음": null
|
||||
}
|
||||
},
|
||||
"ky": {
|
||||
"number_nd_tokens": {
|
||||
"ким": null,
|
||||
"көрө": null,
|
||||
"элек": null
|
||||
}
|
||||
},
|
||||
"my": {
|
||||
"number_tokens": {
|
||||
"ကုဋေ": 7,
|
||||
"သောင်း": 4,
|
||||
"ထ": 3
|
||||
}
|
||||
},
|
||||
"ne": {
|
||||
"number_nd_tokens": {
|
||||
"कुनै": null
|
||||
}
|
||||
},
|
||||
"no": {
|
||||
"number_nd_tokens": {
|
||||
"avspillinger": null
|
||||
}
|
||||
},
|
||||
"or": {
|
||||
"number_tokens": {
|
||||
"ବିଜଣ": 9,
|
||||
"ବି": 9
|
||||
},
|
||||
"number_nd_tokens": {
|
||||
"କୌଣସି": null
|
||||
}
|
||||
},
|
||||
"pa": {
|
||||
"number_nd_tokens": {
|
||||
"ਕਿਸੇ": null,
|
||||
"ਨੇ": null
|
||||
}
|
||||
},
|
||||
"ro": {
|
||||
"number_nd_tokens": {
|
||||
"abonat": null,
|
||||
"vizionare": null
|
||||
}
|
||||
},
|
||||
"sq": {
|
||||
"number_nd_tokens": {
|
||||
"ka": null
|
||||
}
|
||||
},
|
||||
"uk": {
|
||||
"number_nd_tokens": {
|
||||
"перегляду": null
|
||||
}
|
||||
},
|
||||
"ur": {
|
||||
"number_nd_tokens": {
|
||||
"کوئی": null
|
||||
}
|
||||
},
|
||||
"zh-CN": {
|
||||
"number_nd_tokens": {
|
||||
"人": null
|
||||
}
|
||||
},
|
||||
"zu": {
|
||||
"number_nd_tokens": {
|
||||
"kubukwa": null
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
Reference in a new issue