feat: add error reporting for deobf data extraction

This commit is contained in:
ThetaDev 2023-08-03 17:42:31 +02:00
parent ca2335d03f
commit 5736d53c99
4 changed files with 90 additions and 26 deletions

View file

@ -1,5 +1,3 @@
use std::collections::BTreeMap;
use crate::{
error::{Error, ExtractionError},
model::ChannelRss,
@ -46,9 +44,9 @@ impl RustyPipeQuery {
http_request: crate::report::HTTPRequest {
url: &url,
method: "GET",
req_header: BTreeMap::new(),
req_body: String::new(),
status: 200,
req_header: None,
req_body: None,
resp_body: xml,
},
};

View file

@ -399,6 +399,7 @@ struct RequestResult<T> {
}
impl<T> CacheEntry<T> {
/// Get the content of the cache if it is still fresh
fn get(&self) -> Option<&T> {
match self {
CacheEntry::Some { last_update, data } => {
@ -411,6 +412,14 @@ impl<T> CacheEntry<T> {
CacheEntry::None => None,
}
}
/// Get the content of the cache, even if it is expired
fn get_expired(&self) -> Option<&T> {
match self {
CacheEntry::Some { data, .. } => Some(data),
CacheEntry::None => None,
}
}
}
impl<T> From<T> for CacheEntry<T> {
@ -901,7 +910,7 @@ impl RustyPipe {
}
}
/// Instantiate a new deobfuscator from either cached or extracted YouTube JavaScript code.
/// Get deobfuscation data (either from cache or extracted from YouTube's JavaScript code)
async fn get_deobf_data(&self) -> Result<DeobfData, Error> {
// Write lock here to prevent concurrent tasks from fetching the same data
let mut deobf_data = self.inner.cache.deobf.write().await;
@ -909,12 +918,29 @@ impl RustyPipe {
match deobf_data.get() {
Some(deobf_data) => Ok(deobf_data.clone()),
None => {
log::debug!("getting deobfuscator");
let new_data = DeobfData::download(self.inner.http.clone()).await?;
*deobf_data = CacheEntry::from(new_data.clone());
drop(deobf_data);
self.store_cache().await;
Ok(new_data)
log::debug!("getting deobf data");
match DeobfData::extract(self.inner.http.clone(), self.inner.reporter.as_deref())
.await
{
Ok(new_data) => {
// Write new data to the cache
*deobf_data = CacheEntry::from(new_data.clone());
drop(deobf_data);
self.store_cache().await;
Ok(new_data)
}
Err(e) => {
// Try to fall back to expired cache data if available, otherwise return error
match deobf_data.get_expired() {
Some(d) => {
log::warn!("could not get new deobf data ({e}), falling back to expired cache");
Ok(d.clone())
}
None => Err(e),
}
}
}
}
}
}
@ -1370,12 +1396,16 @@ impl RustyPipeQuery {
http_request: crate::report::HTTPRequest {
url: request.url().as_str(),
method: request.method().as_str(),
req_header: request
.headers()
.iter()
.map(|(k, v)| (k.as_str(), v.to_str().unwrap_or_default().to_owned()))
.collect(),
req_body: serde_json::to_string(body).unwrap_or_default(),
req_header: Some(
request
.headers()
.iter()
.map(|(k, v)| {
(k.as_str(), v.to_str().unwrap_or_default().to_owned())
})
.collect(),
),
req_body: serde_json::to_string(body).ok(),
status: req_res.status.into(),
resp_body: req_res.body,
},

View file

@ -6,6 +6,7 @@ use serde::{Deserialize, Serialize};
use crate::{
error::{internal::DeobfError, Error},
report::{Level, Report, Reporter, RustyPipeInfo},
util,
};
@ -22,18 +23,47 @@ pub struct DeobfData {
}
impl DeobfData {
pub async fn download(http: Client) -> Result<Self, Error> {
/// Download and extract the latest deobfuscation data from YouTube
///
/// Creates a report if the data could not be extracted
pub async fn extract(http: Client, reporter: Option<&dyn Reporter>) -> Result<Self, Error> {
let js_url = get_player_js_url(&http).await?;
let player_js = get_response(&http, &js_url).await?;
log::debug!("downloaded player.js from {}", js_url);
let sig_fn = get_sig_fn(&player_js)?;
let nsig_fn = get_nsig_fn(&player_js)?;
let sts = get_sts(&player_js)?;
let res = Self::extract_fns(&js_url, &player_js);
if let Err(e) = &res {
if let Some(reporter) = reporter {
let report = Report {
info: RustyPipeInfo::default(),
level: Level::ERR,
operation: "extract_deobf",
error: Some(e.to_string()),
msgs: vec![],
deobf_data: None,
http_request: crate::report::HTTPRequest {
url: &js_url,
method: "GET",
req_header: None,
req_body: None,
status: 200,
resp_body: player_js,
},
};
reporter.report(&report);
}
}
res
}
fn extract_fns(js_url: &str, player_js: &str) -> Result<Self, Error> {
let sig_fn = get_sig_fn(player_js)?;
let nsig_fn = get_nsig_fn(player_js)?;
let sts = get_sts(player_js)?;
Ok(Self {
js_url,
js_url: js_url.to_owned(),
sig_fn,
nsig_fn,
sts,
@ -42,6 +72,7 @@ impl DeobfData {
}
impl Deobfuscator {
/// Instantiate a new deobfuscator with the given data
pub fn new(data: &DeobfData) -> Result<Self, DeobfError> {
let ctx =
quick_js::Context::new().or(Err(DeobfError::Other("could not create QuickJS rt")))?;
@ -51,6 +82,7 @@ impl Deobfuscator {
Ok(Self { ctx })
}
/// Deobfuscate the `s` parameter from the `signature_cipher` field
pub fn deobfuscate_sig(&self, sig: &str) -> Result<String, DeobfError> {
let res = self.ctx.call_function(DEOBF_SIG_FUNC_NAME, vec![sig])?;
@ -63,6 +95,7 @@ impl Deobfuscator {
)
}
/// Deobfuscate the `n` stream URL parameter to circumvent throttling
pub fn deobfuscate_nsig(&self, nsig: &str) -> Result<String, DeobfError> {
let res = self.ctx.call_function(DEOBF_NSIG_FUNC_NAME, vec![nsig])?;
@ -403,7 +436,7 @@ c[36](c[8],c[32]),c[20](c[25],c[10]),c[2](c[22],c[8]),c[32](c[20],c[16]),c[32](c
#[test]
fn t_update() {
let client = Client::new();
let deobf_data = tokio_test::block_on(DeobfData::download(client)).unwrap();
let deobf_data = tokio_test::block_on(DeobfData::extract(client, None)).unwrap();
let deobf = Deobfuscator::new(&deobf_data).unwrap();
let deobf_sig = deobf.deobfuscate_sig("GOqGOqGOq0QJ8wRAIgaryQHfplJ9xJSKFywyaSMHuuwZYsoMTAvRvfm51qIGECIA5061zWeyfMPX9hEl_U6f9J0tr7GTJMKyPf5XNrJb5fb5i").unwrap();

View file

@ -47,6 +47,7 @@ pub struct Report<'a> {
/// Error (if occurred)
pub error: Option<String>,
/// Detailed error/warning messages
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub msgs: Vec<String>,
/// Deobfuscation data (only for player requests)
#[serde(skip_serializing_if = "Option::is_none")]
@ -77,9 +78,11 @@ pub struct HTTPRequest<'a> {
/// HTTP method
pub method: &'a str,
/// HTTP request header
pub req_header: BTreeMap<&'a str, String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub req_header: Option<BTreeMap<&'a str, String>>,
/// HTTP request body
pub req_body: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub req_body: Option<String>,
/// HTTP response status code
pub status: u16,
/// HTTP response body