refactored cache

This commit is contained in:
ThetaDev 2022-09-14 00:04:51 +02:00
parent 925652acdd
commit dda2211e04
8 changed files with 396 additions and 409 deletions

View file

@ -6,20 +6,35 @@ mod response;
use std::fmt::Debug;
use std::sync::Arc;
use anyhow::{anyhow, Context, Result};
use anyhow::{anyhow, bail, Context, Result};
use chrono::{DateTime, Duration, Utc};
use fancy_regex::Regex;
use log::{error, warn};
use once_cell::sync::Lazy;
use rand::Rng;
use reqwest::{header, Client, ClientBuilder, Method, RequestBuilder};
use reqwest::{header, Client, ClientBuilder, Method, Request, RequestBuilder, Response};
use serde::{de::DeserializeOwned, Deserialize, Serialize};
use tokio::sync::Mutex;
use crate::{
cache::Cache,
deobfuscate::Deobfuscator,
cache::{CacheStorage, FileStorage},
deobfuscate::{DeobfData, Deobfuscator},
model::{Country, Language},
report::{Level, Report, Reporter, YamlFileReporter},
report::{JsonFileReporter, Level, Report, Reporter},
util,
};
/// Client types for accessing the YouTube API.
///
/// There are multiple clients for accessing the YouTube API which have
/// slightly different features
///
/// - **Desktop**: used by youtube.com
/// - **DesktopMusic**: used by music.youtube.com, can access special music data,
/// cannot access non-music content
/// - **TvHtml5Embed**: (probably) used by Smart TVs, can access age-restricted videos
/// - **Android**: used by the Android app, no obfuscated URLs, includes lower resolution audio streams
/// - **Ios**: used by the iOS app, no obfuscated URLs
#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, Hash)]
#[serde(rename_all = "snake_case")]
pub enum ClientType {
@ -95,7 +110,7 @@ impl Default for RequestYT {
#[derive(Clone, Debug, Serialize, Default)]
#[serde(rename_all = "camelCase")]
struct User {
// TO DO: provide a way to enable restricted mode with:
// TODO: provide a way to enable restricted mode with:
// "enableSafetyMode": true
locked_safety_mode: bool,
}
@ -131,6 +146,11 @@ const IOS_DEVICE_MODEL: &str = "iPhone14,5";
static CLIENT_VERSION_REGEXES: Lazy<[Regex; 1]> =
Lazy::new(|| [Regex::new("INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"").unwrap()]);
/// The RustyPipe client used to access YouTube's API
///
/// RustyPipe includes an `Arc` internally, so if you are using the client
/// at multiple locations, you can just clone it. Note that options (lang/country/report)
/// are not shared between clones.
#[derive(Clone)]
pub struct RustyPipe {
inner: Arc<RustyPipeRef>,
@ -139,10 +159,11 @@ pub struct RustyPipe {
struct RustyPipeRef {
http: Client,
cache: Cache,
storage: Option<Box<dyn CacheStorage>>,
reporter: Option<Box<dyn Reporter>>,
user_agent: String,
consent_cookie: String,
cache: Mutex<CacheData>,
}
#[derive(Clone)]
@ -150,13 +171,14 @@ struct RustyPipeOpts {
lang: Language,
country: Country,
report: bool,
strict: bool,
}
impl Default for RustyPipe {
fn default() -> Self {
Self::new(
Some(Cache::from_json_file("RustyPipeCache.json")),
Some(Box::new(YamlFileReporter::default())),
Some(Box::new(FileStorage::default())),
Some(Box::new(JsonFileReporter::default())),
None,
)
}
@ -168,17 +190,64 @@ impl Default for RustyPipeOpts {
lang: Language::En,
country: Country::Us,
report: false,
strict: false,
}
}
}
#[derive(Default, Debug, Clone, Serialize, Deserialize)]
struct CacheData {
desktop_client: CacheEntry<ClientData>,
music_client: CacheEntry<ClientData>,
deobf: CacheEntry<DeobfData>,
}
#[derive(Default, Debug, Clone, Serialize, Deserialize)]
enum CacheEntry<T> {
#[default]
None,
Some {
last_update: DateTime<Utc>,
data: T,
},
}
#[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct ClientData {
pub version: String,
}
impl<T> CacheEntry<T> {
fn get(&self) -> Option<&T> {
match self {
CacheEntry::Some { last_update, data } => {
if last_update < &(Utc::now() - Duration::hours(24)) {
None
} else {
Some(data)
}
}
CacheEntry::None => None,
}
}
}
impl<T> From<T> for CacheEntry<T> {
fn from(f: T) -> Self {
Self::Some {
last_update: Utc::now(),
data: f,
}
}
}
impl RustyPipe {
/// Create a new RustyPipe instance
pub fn new(
cache: Option<Cache>,
storage: Option<Box<dyn CacheStorage>>,
reporter: Option<Box<dyn Reporter>>,
user_agent: Option<String>,
) -> Self {
let cache = cache.unwrap_or_else(|| Cache::default());
let user_agent = user_agent.unwrap_or(DEFAULT_UA.to_owned());
let http = ClientBuilder::new()
@ -188,10 +257,26 @@ impl RustyPipe {
.build()
.expect("unable to build the HTTP client");
let cache = if let Some(storage) = &storage {
if let Some(data) = storage.read() {
match serde_json::from_str::<CacheData>(&data) {
Ok(data) => data,
Err(e) => {
error!("Could not deserialize cache. Error: {}", e);
CacheData::default()
}
}
} else {
CacheData::default()
}
} else {
CacheData::default()
};
RustyPipe {
inner: Arc::new(RustyPipeRef {
http,
cache,
storage,
reporter,
user_agent,
consent_cookie: format!(
@ -200,26 +285,53 @@ impl RustyPipe {
CONSENT_COOKIE_YES,
rand::thread_rng().gen_range(100..1000)
),
cache: Mutex::new(cache),
}),
opts: RustyPipeOpts::default(),
}
}
/// Create a new RustyPipe instance configured for testing
#[cfg(test)]
#[cfg(feature = "yaml")]
pub fn new_test() -> Self {
Self::new(
Some(Box::new(FileStorage::default())),
Some(Box::new(crate::report::YamlFileReporter::default())),
None,
)
.strict(true)
}
/// Set the language parameter used when accessing the YouTube API
/// This will change multilanguage video titles, descriptions and textual dates
pub fn lang(mut self, lang: Language) -> Self {
self.opts.lang = lang;
self
}
/// Set the country parameter used when accessing the YouTube API.
/// This will change trends and recommended content.
pub fn country(mut self, country: Country) -> Self {
self.opts.country = country;
self
}
/// Generate a report on every operation.
/// This should only be used for debugging.
pub fn report(mut self, report: bool) -> Self {
self.opts.report = report;
self
}
/// Enable strict mode, causing operations to fail if there
/// are warnings during deserialization (e.g. invalid items).
/// This should only be used for testing.
pub fn strict(mut self, strict: bool) -> Self {
self.opts.strict = strict;
self
}
async fn get_context(&self, ctype: ClientType, localized: bool) -> ContextYT {
let hl = match localized {
true => self.opts.lang,
@ -234,7 +346,7 @@ impl RustyPipe {
ClientType::Desktop => ContextYT {
client: ClientInfo {
client_name: "WEB".to_owned(),
client_version: DESKTOP_CLIENT_VERSION.to_owned(),
client_version: self.get_desktop_client_version().await,
client_screen: None,
device_model: None,
platform: "DESKTOP".to_owned(),
@ -249,7 +361,7 @@ impl RustyPipe {
ClientType::DesktopMusic => ContextYT {
client: ClientInfo {
client_name: "WEB_REMIX".to_owned(),
client_version: DESKTOP_MUSIC_CLIENT_VERSION.to_owned(),
client_version: self.get_music_client_version().await,
client_screen: None,
device_model: None,
platform: "DESKTOP".to_owned(),
@ -332,7 +444,7 @@ impl RustyPipe {
.header(header::REFERER, "https://www.youtube.com")
.header(header::COOKIE, self.inner.consent_cookie.to_owned())
.header("X-YouTube-Client-Name", "1")
.header("X-YouTube-Client-Version", DESKTOP_CLIENT_VERSION),
.header("X-YouTube-Client-Version", self.get_desktop_client_version().await),
ClientType::DesktopMusic => self
.inner
.http
@ -350,7 +462,7 @@ impl RustyPipe {
.header(header::REFERER, "https://music.youtube.com")
.header(header::COOKIE, self.inner.consent_cookie.to_owned())
.header("X-YouTube-Client-Name", "67")
.header("X-YouTube-Client-Version", DESKTOP_MUSIC_CLIENT_VERSION),
.header("X-YouTube-Client-Version", self.get_music_client_version().await),
ClientType::TvHtml5Embed => self
.inner
.http
@ -410,7 +522,7 @@ impl RustyPipe {
}
}
async fn execute_request<
async fn execute_request_deobf<
R: DeserializeOwned + MapResponse<M> + Debug,
M,
B: Serialize + ?Sized,
@ -448,6 +560,7 @@ impl RustyPipe {
operation: operation.to_owned(),
error,
msgs,
deobf_data: deobf.map(Deobfuscator::get_data),
http_request: crate::report::HTTPRequest {
url: request_url,
method: method.to_string(),
@ -482,6 +595,10 @@ impl RustyPipe {
Some("Warnings during deserialization/mapping".to_owned()),
mapres.warnings,
);
if self.opts.strict {
bail!("Warnings during deserialization/mapping");
}
} else if self.opts.report {
create_report(Level::DBG, None, vec![]);
}
@ -500,6 +617,176 @@ impl RustyPipe {
}
}
}
async fn execute_request<
R: DeserializeOwned + MapResponse<M> + Debug,
M,
B: Serialize + ?Sized,
>(
&self,
ctype: ClientType,
operation: &str,
method: Method,
endpoint: &str,
id: &str,
body: &B,
) -> Result<M> {
self.execute_request_deobf::<R, M, B>(ctype, operation, method, endpoint, id, body, None)
.await
}
async fn get_desktop_client_version(&self) -> String {
let mut cache = self.inner.cache.lock().await;
match cache.desktop_client.get() {
Some(cdata) => cdata.version.to_owned(),
None => match self.extract_desktop_client_version().await {
Ok(version) => {
cache.desktop_client = CacheEntry::from(ClientData {
version: version.to_owned(),
});
self.write_cache(&cache);
version
}
Err(e) => {
warn!("{}, falling back to hardcoded version", e);
DESKTOP_CLIENT_VERSION.to_owned()
}
},
}
}
async fn get_music_client_version(&self) -> String {
let mut cache = self.inner.cache.lock().await;
match cache.music_client.get() {
Some(cdata) => cdata.version.to_owned(),
None => match self.extract_music_client_version().await {
Ok(version) => {
cache.music_client = CacheEntry::from(ClientData {
version: version.to_owned(),
});
self.write_cache(&cache);
version
}
Err(e) => {
warn!("{}, falling back to hardcoded version", e);
DESKTOP_MUSIC_CLIENT_VERSION.to_owned()
}
},
}
}
async fn get_deobf(&self) -> Result<Deobfuscator> {
let mut cache = self.inner.cache.lock().await;
let deobf = Deobfuscator::new(self.inner.http.clone()).await?;
cache.deobf = CacheEntry::from(deobf.get_data());
self.write_cache(&cache);
Ok(deobf)
}
async fn extract_desktop_client_version(&self) -> Result<String> {
let from_swjs = async {
let swjs = self
.exec_request_text(
self.inner
.http
.get("https://www.youtube.com/sw.js")
.header(header::ORIGIN, "https://www.youtube.com")
.header(header::REFERER, "https://www.youtube.com")
.header(header::COOKIE, self.inner.consent_cookie.to_owned())
.build()
.unwrap(),
)
.await
.context("Failed to download sw.js")?;
util::get_cg_from_regexes(CLIENT_VERSION_REGEXES.iter(), &swjs, 1)
.ok_or(anyhow!("Could not find desktop client version in sw.js"))
};
let from_html = async {
let html = self
.exec_request_text(
self.inner
.http
.get("https://www.youtube.com/results?search_query=")
.build()
.unwrap(),
)
.await
.context("Failed to get YT Desktop page")?;
util::get_cg_from_regexes(CLIENT_VERSION_REGEXES.iter(), &html, 1).ok_or(anyhow!(
"Could not find desktop client version on html page"
))
};
match from_swjs.await {
Ok(client_version) => Ok(client_version),
Err(_) => from_html.await,
}
}
async fn extract_music_client_version(&self) -> Result<String> {
let from_swjs = async {
let swjs = self
.exec_request_text(
self.inner
.http
.get("https://music.youtube.com/sw.js")
.header(header::ORIGIN, "https://music.youtube.com")
.header(header::REFERER, "https://music.youtube.com")
.header(header::COOKIE, self.inner.consent_cookie.to_owned())
.build()
.unwrap(),
)
.await
.context("Failed to download sw.js")?;
util::get_cg_from_regexes(CLIENT_VERSION_REGEXES.iter(), &swjs, 1)
.ok_or(anyhow!("Could not find desktop client version in sw.js"))
};
let from_html = async {
let html = self
.exec_request_text(
self.inner
.http
.get("https://music.youtube.com")
.build()
.unwrap(),
)
.await
.context("Failed to get YT Desktop page")?;
util::get_cg_from_regexes(CLIENT_VERSION_REGEXES.iter(), &html, 1).ok_or(anyhow!(
"Could not find desktop client version on html page"
))
};
match from_swjs.await {
Ok(client_version) => Ok(client_version),
Err(_) => from_html.await,
}
}
async fn exec_request(&self, request: Request) -> Result<Response> {
Ok(self.inner.http.execute(request).await?.error_for_status()?)
}
async fn exec_request_text(&self, request: Request) -> Result<String> {
Ok(self.exec_request(request).await?.text().await?)
}
fn write_cache(&self, cache: &CacheData) {
if let Some(storage) = &self.inner.storage {
match serde_json::to_string(cache) {
Ok(data) => storage.write(&data),
Err(e) => error!("Could not serialize cache. Error: {}", e),
}
}
}
}
trait MapResponse<T> {
@ -525,10 +812,3 @@ where
self.c.fmt(f)
}
}
/*
#[cfg(test)]
mod tests {
use super::*;
}
*/

View file

@ -59,10 +59,10 @@ struct QContentPlaybackContext {
impl RustyPipe {
pub async fn get_player(&self, video_id: &str, client_type: ClientType) -> Result<VideoPlayer> {
let (context, deobf) = tokio::join!(
self.get_context(client_type, false),
Deobfuscator::from_fetched_info(self.inner.http.clone(), self.inner.cache.clone())
);
let (context, deobf) = tokio::join!(self.get_context(client_type, false), self.get_deobf());
// let context = self.get_context(client_type, false).await;
// let deobf = self.get_deobf().await;
let deobf = deobf?;
let request_body = if client_type.is_web() {
@ -90,7 +90,7 @@ impl RustyPipe {
}
};
self.execute_request::<response::Player, _, _>(
self.execute_request_deobf::<response::Player, _, _>(
client_type,
"get_player",
Method::POST,
@ -575,10 +575,11 @@ fn get_audio_codec(codecs: Vec<&str>) -> AudioCodec {
}
#[cfg(test)]
#[cfg(feature = "yaml")]
mod tests {
use std::{fs::File, io::BufReader, path::Path};
use crate::{cache::DeobfData, client2::CLIENT_TYPES, report::TestFileReporter};
use crate::{deobfuscate::DeobfData, client2::CLIENT_TYPES, report::TestFileReporter};
use super::*;
use rstest::rstest;
@ -613,7 +614,7 @@ mod tests {
#[test_log::test(tokio::test)]
async fn download_model_testfiles() {
let tf_dir = Path::new("testfiles/player_model");
let rp = RustyPipe::default();
let rp = RustyPipe::new_test();
for (name, id) in [("multilanguage", "tVWWp1PqDus"), ("hdr", "LXb3EKWsInQ")] {
let mut json_path = tf_dir.to_path_buf();
@ -683,7 +684,7 @@ mod tests {
#[case::ios(ClientType::Ios)]
#[test_log::test(tokio::test)]
async fn t_get_player(#[case] client_type: ClientType) {
let rp = RustyPipe::default();
let rp = RustyPipe::new_test();
let player_data = rp.get_player("n4tK7LYFxI0", client_type).await.unwrap();
// dbg!(&player_data);

View file

@ -40,7 +40,6 @@ impl RustyPipe {
"browse",
playlist_id,
&request_body,
None,
)
.await
}
@ -62,7 +61,6 @@ impl RustyPipe {
"browse",
&playlist.id,
&request_body,
None,
)
.await?;
@ -350,7 +348,7 @@ mod tests {
#[case] description: Option<String>,
#[case] channel: Option<Channel>,
) {
let rp = RustyPipe::default();
let rp = RustyPipe::new_test();
let playlist = rp.get_playlist(id).await.unwrap();
assert_eq!(playlist.id, id);
@ -412,7 +410,7 @@ mod tests {
#[test_log::test(tokio::test)]
async fn t_playlist_cont() {
let rp = RustyPipe::default();
let rp = RustyPipe::new_test();
let mut playlist = rp
.get_playlist("PLbZIPy20-1pN7mqjckepWF78ndb6ci_qi")
.await