feat: implemented large number parser
This commit is contained in:
parent
f66dce9d3d
commit
0659a64361
4 changed files with 79 additions and 44 deletions
|
|
@ -2,7 +2,11 @@ use anyhow::{bail, Result};
|
|||
use reqwest::Method;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{model::ChannelVideos, serializer::MapResult};
|
||||
use crate::{
|
||||
model::{ChannelVideos, Paginator},
|
||||
serializer::MapResult,
|
||||
util,
|
||||
};
|
||||
|
||||
use super::{response, ClientType, MapResponse, RustyPipeQuery, YTContext};
|
||||
|
||||
|
|
@ -53,7 +57,7 @@ impl MapResponse<ChannelVideos> for response::Channel {
|
|||
fn map_response(
|
||||
self,
|
||||
id: &str,
|
||||
_lang: crate::model::Language,
|
||||
lang: crate::model::Language,
|
||||
_deobf: Option<&crate::deobfuscate::Deobfuscator>,
|
||||
) -> Result<MapResult<ChannelVideos>> {
|
||||
let warnings = Vec::new();
|
||||
|
|
@ -72,6 +76,10 @@ impl MapResponse<ChannelVideos> for response::Channel {
|
|||
c: ChannelVideos {
|
||||
id: header.channel_id,
|
||||
name: header.title,
|
||||
subscriber_count: header
|
||||
.subscriber_count_text
|
||||
.and_then(|txt| util::parse_large_numstr(&txt, lang)),
|
||||
videos: Paginator::default(),
|
||||
},
|
||||
warnings,
|
||||
})
|
||||
|
|
|
|||
|
|
@ -177,14 +177,12 @@ impl MapResponse<VideoDetails> for response::VideoDetails {
|
|||
_ => bail!("could not find primary_info"),
|
||||
};
|
||||
|
||||
/*
|
||||
TODO: use large number parser for this
|
||||
let comment_count = comment_count_section.and_then(|s| {
|
||||
util::parse_numeric_or_warn::<u32>(
|
||||
util::parse_large_numstr::<u32>(
|
||||
&s.comments_entry_point_header_renderer.comment_count,
|
||||
&mut warnings,
|
||||
lang,
|
||||
)
|
||||
});*/
|
||||
});
|
||||
|
||||
let comment_ctoken = comment_ctoken_section.map(|s| {
|
||||
s.continuation_item_renderer
|
||||
|
|
@ -301,8 +299,9 @@ impl MapResponse<VideoDetails> for response::VideoDetails {
|
|||
name: channel_name,
|
||||
avatar: owner.thumbnail.into(),
|
||||
verification: owner.badges.into(),
|
||||
subscriber_count: None,
|
||||
subscriber_count_txt: owner.subscriber_count_text,
|
||||
subscriber_count: owner
|
||||
.subscriber_count_text
|
||||
.and_then(|txt| util::parse_large_numstr(&txt, lang)),
|
||||
},
|
||||
view_count,
|
||||
like_count,
|
||||
|
|
@ -312,8 +311,8 @@ impl MapResponse<VideoDetails> for response::VideoDetails {
|
|||
is_ccommons,
|
||||
chapters,
|
||||
recommended,
|
||||
top_comments: Paginator::new(None, Vec::new(), comment_ctoken),
|
||||
latest_comments: Paginator::new(None, Vec::new(), latest_comments_ctoken),
|
||||
top_comments: Paginator::new(comment_count, Vec::new(), comment_ctoken),
|
||||
latest_comments: Paginator::new(comment_count, Vec::new(), latest_comments_ctoken),
|
||||
},
|
||||
warnings,
|
||||
})
|
||||
|
|
@ -431,7 +430,6 @@ fn map_recommendations(
|
|||
avatar: video.channel_thumbnail.into(),
|
||||
verification: video.owner_badges.into(),
|
||||
subscriber_count: None,
|
||||
subscriber_count_txt: None,
|
||||
},
|
||||
publish_date: video.published_time_text.as_ref().and_then(|txt| {
|
||||
timeago::parse_timeago_or_warn(lang, txt, &mut warnings)
|
||||
|
|
@ -514,7 +512,6 @@ fn map_comment(
|
|||
.map(|b| b.author_comment_badge_renderer.icon.into())
|
||||
.unwrap_or_default(),
|
||||
subscriber_count: None,
|
||||
subscriber_count_txt: None,
|
||||
}),
|
||||
_ => None,
|
||||
},
|
||||
|
|
@ -620,7 +617,11 @@ mod tests {
|
|||
assert_eq!(details.channel.name, "SMTOWN");
|
||||
assert!(!details.channel.avatar.is_empty(), "no channel avatars");
|
||||
assert_eq!(details.channel.verification, Verification::Verified);
|
||||
// TODO: assert!(details.channel.subscriber_count.unwrap() > 30000000, "expected >30M subs, got {}", details.channel.subscriber_count);
|
||||
assert!(
|
||||
details.channel.subscriber_count.unwrap() > 30000000,
|
||||
"expected >30M subs, got {}",
|
||||
details.channel.subscriber_count.unwrap()
|
||||
);
|
||||
|
||||
assert!(
|
||||
details.view_count > 232000000,
|
||||
|
|
@ -644,11 +645,11 @@ mod tests {
|
|||
assert!(!details.recommended.items.is_empty());
|
||||
assert!(!details.recommended.is_exhausted());
|
||||
|
||||
// assert!(
|
||||
// details.top_comments.count.unwrap() > 700000,
|
||||
// "expected > 700K comments, got {}",
|
||||
// details.top_comments.count.unwrap()
|
||||
// );
|
||||
assert!(
|
||||
details.top_comments.count.unwrap() > 700000,
|
||||
"expected > 700K comments, got {}",
|
||||
details.top_comments.count.unwrap()
|
||||
);
|
||||
assert!(!details.top_comments.is_exhausted());
|
||||
assert!(!details.latest_comments.is_exhausted());
|
||||
}
|
||||
|
|
@ -671,7 +672,11 @@ mod tests {
|
|||
assert_eq!(details.channel.name, "Sentamusic");
|
||||
assert!(!details.channel.avatar.is_empty(), "no channel avatars");
|
||||
assert_eq!(details.channel.verification, Verification::Artist);
|
||||
// TODO: assert!(details.channel.subscriber_count.unwrap() > 33000, "expected >33K subs, got {}", details.channel.subscriber_count);
|
||||
assert!(
|
||||
details.channel.subscriber_count.unwrap() > 33000,
|
||||
"expected >33K subs, got {}",
|
||||
details.channel.subscriber_count.unwrap()
|
||||
);
|
||||
|
||||
assert!(
|
||||
details.view_count > 20309,
|
||||
|
|
@ -730,7 +735,11 @@ mod tests {
|
|||
assert_eq!(details.channel.name, "media.ccc.de");
|
||||
assert!(!details.channel.avatar.is_empty(), "no channel avatars");
|
||||
assert_eq!(details.channel.verification, Verification::None);
|
||||
// TODO: assert!(details.channel.subscriber_count.unwrap() > 170000, "expected >170K subs, got {}", details.channel.subscriber_count);
|
||||
assert!(
|
||||
details.channel.subscriber_count.unwrap() > 170000,
|
||||
"expected >170K subs, got {}",
|
||||
details.channel.subscriber_count.unwrap()
|
||||
);
|
||||
|
||||
assert!(
|
||||
details.view_count > 2517358,
|
||||
|
|
@ -754,11 +763,11 @@ mod tests {
|
|||
assert!(!details.recommended.items.is_empty());
|
||||
assert!(!details.recommended.is_exhausted());
|
||||
|
||||
// assert!(
|
||||
// details.top_comments.count.unwrap() > 700000,
|
||||
// "expected > 700K comments, got {}",
|
||||
// details.top_comments.count.unwrap()
|
||||
// );
|
||||
assert!(
|
||||
details.top_comments.count.unwrap() > 2199,
|
||||
"expected > 2199 comments, got {}",
|
||||
details.top_comments.count.unwrap()
|
||||
);
|
||||
assert!(!details.top_comments.is_exhausted());
|
||||
assert!(!details.latest_comments.is_exhausted());
|
||||
}
|
||||
|
|
@ -961,7 +970,11 @@ mod tests {
|
|||
assert_eq!(details.channel.name, "Linus Tech Tips");
|
||||
assert!(!details.channel.avatar.is_empty(), "no channel avatars");
|
||||
assert_eq!(details.channel.verification, Verification::Verified);
|
||||
// TODO: assert!(details.channel.subscriber_count.unwrap() > 14700000, "expected >14.7M subs, got {}", details.channel.subscriber_count);
|
||||
assert!(
|
||||
details.channel.subscriber_count.unwrap() > 14700000,
|
||||
"expected >14.7M subs, got {}",
|
||||
details.channel.subscriber_count.unwrap()
|
||||
);
|
||||
|
||||
assert!(
|
||||
details.view_count > 1157262,
|
||||
|
|
@ -1036,11 +1049,11 @@ mod tests {
|
|||
assert!(!details.recommended.items.is_empty());
|
||||
assert!(!details.recommended.is_exhausted());
|
||||
|
||||
// assert!(
|
||||
// details.top_comments.count.unwrap() > 700000,
|
||||
// "expected > 700K comments, got {}",
|
||||
// details.top_comments.count.unwrap()
|
||||
// );
|
||||
assert!(
|
||||
details.top_comments.count.unwrap() > 3199,
|
||||
"expected > 3199 comments, got {}",
|
||||
details.top_comments.count.unwrap()
|
||||
);
|
||||
assert!(!details.top_comments.is_exhausted());
|
||||
assert!(!details.latest_comments.is_exhausted());
|
||||
}
|
||||
|
|
@ -1057,7 +1070,6 @@ mod tests {
|
|||
details.title,
|
||||
"🌎 Nasa Live Stream - Earth From Space : Live Views from the ISS"
|
||||
);
|
||||
// TODO: not full description
|
||||
insta::assert_yaml_snapshot!(details.description, @r###"
|
||||
---
|
||||
- Text: "Live NASA - Views Of Earth from Space\nLive video feed of Earth from the International Space Station (ISS) Cameras\n-----------------------------------------------------------------------------------------------------\nWatch our latest video - The Sun - 4K Video / Solar Flares\n"
|
||||
|
|
@ -1088,7 +1100,11 @@ mod tests {
|
|||
assert_eq!(details.channel.name, "Space Videos");
|
||||
assert!(!details.channel.avatar.is_empty(), "no channel avatars");
|
||||
assert_eq!(details.channel.verification, Verification::Verified);
|
||||
// TODO: assert!(details.channel.subscriber_count.unwrap() > 5500000, "expected >5.5M subs, got {}", details.channel.subscriber_count);
|
||||
assert!(
|
||||
details.channel.subscriber_count.unwrap() > 5500000,
|
||||
"expected >5.5M subs, got {}",
|
||||
details.channel.subscriber_count.unwrap()
|
||||
);
|
||||
|
||||
assert!(
|
||||
details.view_count > 10,
|
||||
|
|
@ -1140,7 +1156,11 @@ mod tests {
|
|||
assert_eq!(details.channel.name, "PrinceOfFALLEN");
|
||||
assert!(!details.channel.avatar.is_empty(), "no channel avatars");
|
||||
assert_eq!(details.channel.verification, Verification::None);
|
||||
// TODO: assert!(details.channel.subscriber_count.unwrap() > 1400, "expected >1400 subs, got {}", details.channel.subscriber_count);
|
||||
assert!(
|
||||
details.channel.subscriber_count.unwrap() > 1400,
|
||||
"expected >1400 subs, got {}",
|
||||
details.channel.subscriber_count.unwrap()
|
||||
);
|
||||
|
||||
assert!(
|
||||
details.view_count > 200,
|
||||
|
|
|
|||
|
|
@ -329,10 +329,10 @@ pub struct Channel {
|
|||
pub verification: Verification,
|
||||
/// Approximate number of subscribers
|
||||
///
|
||||
/// `None` if hidden by the owner or not present.
|
||||
///
|
||||
/// Info: This is only present in the `VideoDetails` response
|
||||
pub subscriber_count: Option<u32>,
|
||||
/// Textual subscriber count (e.g `1.41M subscribers`, depends on language)
|
||||
pub subscriber_count_txt: Option<String>,
|
||||
pub subscriber_count: Option<u64>,
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -400,14 +400,13 @@ pub struct ChannelVideos {
|
|||
pub id: String,
|
||||
/// Channel name
|
||||
pub name: String,
|
||||
/*
|
||||
/// Channel subscriber count
|
||||
///
|
||||
/// `None` if the subscriber count was hidden by the owner
|
||||
/// or could not be parsed.
|
||||
pub subscriber_count: Option<u64>,
|
||||
/// Videos fetched from the channel
|
||||
pub videos: Paginator<ChannelVideo>,
|
||||
*/
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
|
|
|
|||
16
src/util.rs
16
src/util.rs
|
|
@ -230,7 +230,11 @@ impl<T> TryRemove<T> for Vec<T> {
|
|||
}
|
||||
}
|
||||
|
||||
fn parse_large_numstr(string: &str, lang: Language) -> Option<u64> {
|
||||
/// Parse a large, textual number (e.g. `1.4M subscribers`, `22K views`)
|
||||
pub fn parse_large_numstr<F>(string: &str, lang: Language) -> Option<F>
|
||||
where
|
||||
F: TryFrom<u64>,
|
||||
{
|
||||
let dict_entry = dictionary::entry(lang);
|
||||
let decimal_point = match dict_entry.comma_decimal {
|
||||
true => ',',
|
||||
|
|
@ -275,10 +279,14 @@ fn parse_large_numstr(string: &str, lang: Language) -> Option<u64> {
|
|||
.sum::<i32>();
|
||||
}
|
||||
|
||||
num.checked_mul(some_or_bail!(
|
||||
(10_u64).checked_pow(ok_or_bail!(exp.try_into(), None)),
|
||||
F::try_from(some_or_bail!(
|
||||
num.checked_mul(some_or_bail!(
|
||||
(10_u64).checked_pow(ok_or_bail!(exp.try_into(), None)),
|
||||
None
|
||||
)),
|
||||
None
|
||||
))
|
||||
.ok()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -407,7 +415,7 @@ mod tests {
|
|||
(((expect as f64) / factor as f64).floor() as u64) * factor
|
||||
};
|
||||
|
||||
let res = parse_large_numstr(string, lang).expect(string);
|
||||
let res = parse_large_numstr::<u64>(string, lang).expect(string);
|
||||
assert_eq!(
|
||||
res, rounded,
|
||||
"{} (lang: {}, exact: {})",
|
||||
|
|
|
|||
Reference in a new issue