feat: add support for rustypipe-botguard to get PO tokens
This commit is contained in:
parent
92340056f8
commit
b90a252a5e
16 changed files with 313 additions and 100 deletions
|
|
@ -25,6 +25,7 @@ mod video_details;
|
|||
mod channel_rss;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::OsString;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::{borrow::Cow, fmt::Debug, time::Duration};
|
||||
|
|
@ -97,6 +98,13 @@ impl ClientType {
|
|||
fn needs_deobf(self) -> bool {
|
||||
!matches!(self, ClientType::Ios)
|
||||
}
|
||||
|
||||
fn needs_po_token(self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
ClientType::Desktop | ClientType::DesktopMusic | ClientType::Mobile
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// YouTube context request parameter
|
||||
|
|
@ -317,7 +325,7 @@ pub(crate) const DEFAULT_UA: &str = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit
|
|||
pub(crate) const MOBILE_UA: &str = "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.6778.135 Mobile Safari/537.36";
|
||||
pub(crate) const TV_UA: &str = "Mozilla/5.0 (SMART-TV; Linux; Tizen 5.0) AppleWebKit/538.1 (KHTML, like Gecko) Version/5.0 NativeTVAds Safari/538.1";
|
||||
|
||||
const CONSENT_COOKIE: &str = "SOCS=CAISAiAD";
|
||||
pub(crate) const CONSENT_COOKIE: &str = "SOCS=CAISAiAD";
|
||||
|
||||
const YOUTUBEI_V1_URL: &str = "https://www.youtube.com/youtubei/v1/";
|
||||
const YOUTUBEI_V1_GAPIS_URL: &str = "https://youtubei.googleapis.com/youtubei/v1/";
|
||||
|
|
@ -352,13 +360,6 @@ const OAUTH_SCOPES: &str = "http://gdata.youtube.com https://www.googleapis.com/
|
|||
static CLIENT_VERSION_REGEX: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new(r#""INNERTUBE_CONTEXT_CLIENT_VERSION":"([\w\d\._-]+?)""#).unwrap());
|
||||
|
||||
/// Default order of client types when fetching player data
|
||||
///
|
||||
/// The order may change in the future in case YouTube applies changes to their
|
||||
/// platform that disable a client or make it less reliable.
|
||||
pub const DEFAULT_PLAYER_CLIENT_ORDER: &[ClientType] =
|
||||
&[ClientType::Ios, ClientType::Tv, ClientType::Android];
|
||||
|
||||
/// The RustyPipe client used to access YouTube's API
|
||||
///
|
||||
/// RustyPipe uses an [`Arc`] internally, so if you are using the client
|
||||
|
|
@ -378,6 +379,7 @@ struct RustyPipeRef {
|
|||
default_opts: RustyPipeOpts,
|
||||
user_agent: Cow<'static, str>,
|
||||
visitor_data_cache: VisitorDataCache,
|
||||
botguard: Option<BotguardCfg>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
|
|
@ -399,6 +401,12 @@ pub struct RustyPipeBuilder {
|
|||
user_agent: Option<String>,
|
||||
default_opts: RustyPipeOpts,
|
||||
storage_dir: Option<PathBuf>,
|
||||
botguard_bin: DefaultOpt<OsString>,
|
||||
}
|
||||
|
||||
struct BotguardCfg {
|
||||
program: OsString,
|
||||
snapshot_file: PathBuf,
|
||||
}
|
||||
|
||||
enum DefaultOpt<T> {
|
||||
|
|
@ -415,6 +423,13 @@ impl<T> DefaultOpt<T> {
|
|||
DefaultOpt::Default => Some(f()),
|
||||
}
|
||||
}
|
||||
fn or_default_opt<F: FnOnce() -> Option<T>>(self, f: F) -> Option<T> {
|
||||
match self {
|
||||
DefaultOpt::Some(x) => Some(x),
|
||||
DefaultOpt::None => None,
|
||||
DefaultOpt::Default => f(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// # RustyPipe query
|
||||
|
|
@ -477,7 +492,7 @@ impl<T> DefaultOpt<T> {
|
|||
///
|
||||
/// ## Options
|
||||
///
|
||||
/// You can set the language, country and visitor data cookie for individual requests.
|
||||
/// You can set the language, country and visitor data ID for individual requests.
|
||||
///
|
||||
/// ```
|
||||
/// # use rustypipe::client::RustyPipe;
|
||||
|
|
@ -626,6 +641,7 @@ impl RustyPipeBuilder {
|
|||
n_http_retries: 2,
|
||||
user_agent: None,
|
||||
storage_dir: None,
|
||||
botguard_bin: DefaultOpt::Default,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -690,12 +706,25 @@ impl RustyPipeBuilder {
|
|||
|
||||
let visitor_data_cache = VisitorDataCache::new(http.clone());
|
||||
|
||||
let botguard_bin = self.botguard_bin.or_default_opt(|| {
|
||||
let n = OsString::from("rustypipe-botguard");
|
||||
let out = std::process::Command::new(&n)
|
||||
.arg("--version")
|
||||
.output()
|
||||
.ok()?;
|
||||
if out.status.success() {
|
||||
Some(n)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
|
||||
Ok(RustyPipe {
|
||||
inner: Arc::new(RustyPipeRef {
|
||||
http,
|
||||
storage,
|
||||
reporter: self.reporter.or_default(|| {
|
||||
let mut report_dir = storage_dir;
|
||||
let mut report_dir = storage_dir.clone();
|
||||
report_dir.push(DEFAULT_REPORT_DIR);
|
||||
Box::new(FileReporter::new(report_dir))
|
||||
}),
|
||||
|
|
@ -709,6 +738,14 @@ impl RustyPipeBuilder {
|
|||
default_opts: self.default_opts,
|
||||
user_agent,
|
||||
visitor_data_cache,
|
||||
botguard: botguard_bin.map(|program| {
|
||||
let mut snapshot_file = storage_dir;
|
||||
snapshot_file.push("bg_snapshot.bin");
|
||||
BotguardCfg {
|
||||
program,
|
||||
snapshot_file,
|
||||
}
|
||||
}),
|
||||
}),
|
||||
})
|
||||
}
|
||||
|
|
@ -868,14 +905,14 @@ impl RustyPipeBuilder {
|
|||
self
|
||||
}
|
||||
|
||||
/// Set the YouTube visitor data cookie
|
||||
/// Set the YouTube visitor data ID
|
||||
///
|
||||
/// YouTube assigns a session cookie to each user which is used for personalized
|
||||
/// recommendations. By default, RustyPipe does not send this cookie to preserve
|
||||
/// user privacy. For requests that mandatate the cookie, a new one is requested
|
||||
/// for every query.
|
||||
///
|
||||
/// This option allows you to manually set the visitor data cookie of your client,
|
||||
/// This option allows you to manually set the visitor data ID of your client,
|
||||
/// allowing you to get personalized recommendations or reproduce A/B tests.
|
||||
///
|
||||
/// Note that YouTube has a rate limit on the number of requests from a single
|
||||
|
|
@ -888,7 +925,7 @@ impl RustyPipeBuilder {
|
|||
self
|
||||
}
|
||||
|
||||
/// Set the YouTube visitor data cookie to an optional value
|
||||
/// Set the YouTube visitor data ID to an optional value
|
||||
///
|
||||
/// see also [`RustyPipeBuilder::visitor_data`]
|
||||
///
|
||||
|
|
@ -898,6 +935,26 @@ impl RustyPipeBuilder {
|
|||
self.default_opts.visitor_data = visitor_data.map(S::into);
|
||||
self
|
||||
}
|
||||
|
||||
/// Disable RustyPipe Botguard
|
||||
///
|
||||
/// By default, RustyPipe uses the `rustypipe-botguard` binary if it is available. If you want to
|
||||
/// use RustyPipe without Botguard, you can disable it.
|
||||
pub fn no_botguard(mut self) -> Self {
|
||||
self.botguard_bin = DefaultOpt::None;
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable RustyPipe Botguard using the given binary
|
||||
///
|
||||
/// Botguard is required to generate PO tokens for accessing streams on browser-based clients.
|
||||
/// By default, RustyPipe uses the `rustypipe-botguard` binary if it is available.
|
||||
///
|
||||
/// More information: <https://codeberg.org/ThetaDev/rustypipe-botguard>
|
||||
pub fn botguard_bin<S: Into<OsString>>(mut self, botguard_bin: S) -> Self {
|
||||
self.botguard_bin = DefaultOpt::Some(botguard_bin.into());
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for RustyPipe {
|
||||
|
|
@ -1191,17 +1248,6 @@ impl RustyPipe {
|
|||
}
|
||||
}
|
||||
|
||||
/// Request a new visitor data cookie from YouTube
|
||||
///
|
||||
/// Since the cookie is shared between YT and YTM and the YTM page loads faster,
|
||||
/// we request that.
|
||||
///
|
||||
/// Sometimes YouTube does not set the `__Secure-YEC` cookie. In this case, the
|
||||
/// visitor data is extracted from the html page.
|
||||
async fn get_visitor_data(&self) -> Result<String, Error> {
|
||||
self.inner.visitor_data_cache.new_visitor_data().await
|
||||
}
|
||||
|
||||
/// Get a new device code for logging into YouTube
|
||||
pub async fn user_auth_get_code(&self) -> Result<OauthDeviceCode, Error> {
|
||||
tracing::debug!("getting OAuth user code");
|
||||
|
|
@ -1618,14 +1664,14 @@ impl RustyPipeQuery {
|
|||
self
|
||||
}
|
||||
|
||||
/// Set the YouTube visitor data cookie
|
||||
/// Set the YouTube visitor data ID
|
||||
///
|
||||
/// YouTube assigns a session cookie to each user which is used for personalized
|
||||
/// recommendations. By default, RustyPipe does not send this cookie to preserve
|
||||
/// user privacy. For requests that mandatate the cookie, a new one is requested
|
||||
/// for every query.
|
||||
///
|
||||
/// This option allows you to manually set the visitor data cookie of your query,
|
||||
/// This option allows you to manually set the visitor data ID of your query,
|
||||
/// allowing you to get personalized recommendations or reproduce A/B tests.
|
||||
///
|
||||
/// Note that YouTube has a rate limit on the number of requests from a single
|
||||
|
|
@ -1636,7 +1682,7 @@ impl RustyPipeQuery {
|
|||
self
|
||||
}
|
||||
|
||||
/// Set the YouTube visitor data cookie to an optional value
|
||||
/// Set the YouTube visitor data ID to an optional value
|
||||
///
|
||||
/// see also [`RustyPipeQuery::visitor_data`]
|
||||
#[must_use]
|
||||
|
|
@ -1845,7 +1891,7 @@ impl RustyPipeQuery {
|
|||
/// - `ctype`: Client type (`Desktop`, `DesktopMusic`, `Android`, ...)
|
||||
/// - `method`: HTTP method
|
||||
/// - `endpoint`: YouTube API endpoint (`https://www.youtube.com/youtubei/v1/<XYZ>?key=...`)
|
||||
/// - `visitor_data`: YouTube visitor data cookie
|
||||
/// - `visitor_data`: YouTube visitor data ID
|
||||
async fn request_builder(
|
||||
&self,
|
||||
ctype: ClientType,
|
||||
|
|
@ -1987,14 +2033,75 @@ impl RustyPipeQuery {
|
|||
Some(format!("SAPISIDHASH {time_now}_{sapisidhash_hex}"))
|
||||
}
|
||||
|
||||
/// Get a YouTube visitor data cookie, which is necessary for certain requests
|
||||
pub async fn get_visitor_data(&self) -> Result<String, Error> {
|
||||
/// Get a YouTube visitor data ID, which is necessary for certain requests
|
||||
pub async fn get_visitor_data(&self, force_new: bool) -> Result<String, Error> {
|
||||
if force_new {
|
||||
return self
|
||||
.client
|
||||
.inner
|
||||
.visitor_data_cache
|
||||
.new_visitor_data()
|
||||
.await;
|
||||
}
|
||||
|
||||
match &self.opts.visitor_data {
|
||||
Some(vd) => Ok(vd.clone()),
|
||||
None => self.client.get_visitor_data().await,
|
||||
None => self.client.inner.visitor_data_cache.get().await,
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove a YouTube visitor data ID from the cache so it is not used again
|
||||
pub fn remove_visitor_data(&self, visitor_data: &str) {
|
||||
self.client.inner.visitor_data_cache.remove(visitor_data);
|
||||
}
|
||||
|
||||
/// Get PO tokens
|
||||
async fn get_po_tokens(&self, idents: &[&str]) -> Result<Vec<String>, Error> {
|
||||
let bg = self
|
||||
.client
|
||||
.inner
|
||||
.botguard
|
||||
.as_ref()
|
||||
.ok_or(Error::Extraction(ExtractionError::Botguard(
|
||||
"not enabled".into(),
|
||||
)))?;
|
||||
let cmd = tokio::process::Command::new(&bg.program)
|
||||
.arg("--snapshot-file")
|
||||
.arg(&bg.snapshot_file)
|
||||
.arg("--")
|
||||
.args(idents)
|
||||
.output()
|
||||
.await
|
||||
.map_err(|e| Error::Extraction(ExtractionError::Botguard(e.to_string().into())))?;
|
||||
if !cmd.status.success() {
|
||||
return Err(Error::Extraction(ExtractionError::Botguard(
|
||||
String::from_utf8_lossy(&cmd.stderr).into_owned().into(),
|
||||
)));
|
||||
}
|
||||
|
||||
let output = String::from_utf8(cmd.stdout)
|
||||
.map_err(|e| Error::Extraction(ExtractionError::Botguard(e.to_string().into())))?;
|
||||
let tokens = output
|
||||
.split_whitespace()
|
||||
.take(idents.len())
|
||||
.map(str::to_owned)
|
||||
.collect::<Vec<_>>();
|
||||
if tokens.len() != idents.len() {
|
||||
return Err(Error::Extraction(ExtractionError::Botguard(
|
||||
"too few tokens returned".into(),
|
||||
)));
|
||||
}
|
||||
tracing::debug!("generated PO token");
|
||||
Ok(tokens)
|
||||
}
|
||||
|
||||
/// Get a PO token
|
||||
pub async fn get_po_token<S: AsRef<str>>(self, ident: S) -> Result<String, Error> {
|
||||
self.get_po_tokens(&[ident.as_ref()])
|
||||
.await
|
||||
.map(|res| res.into_iter().next().unwrap())
|
||||
}
|
||||
|
||||
async fn yt_request_attempt<R: DeserializeOwned + MapResponse<M> + Debug, M>(
|
||||
&self,
|
||||
request: &Request,
|
||||
|
|
@ -2128,6 +2235,7 @@ impl RustyPipeQuery {
|
|||
client_type: ctype,
|
||||
artist: ctx_src.artist,
|
||||
authenticated: self.opts.auth.unwrap_or_default(),
|
||||
session_po_token: ctx_src.session_po_token,
|
||||
};
|
||||
|
||||
let request = self
|
||||
|
|
@ -2284,6 +2392,7 @@ struct MapRespCtx<'a> {
|
|||
client_type: ClientType,
|
||||
artist: Option<ArtistId>,
|
||||
authenticated: bool,
|
||||
session_po_token: Option<&'a str>,
|
||||
}
|
||||
|
||||
/// Options to give to the mapper when making requests;
|
||||
|
|
@ -2294,6 +2403,7 @@ struct MapRespOptions<'a> {
|
|||
deobf: Option<&'a DeobfData>,
|
||||
artist: Option<ArtistId>,
|
||||
unlocalized: bool,
|
||||
session_po_token: Option<&'a str>,
|
||||
}
|
||||
|
||||
#[allow(clippy::needless_lifetimes)]
|
||||
|
|
@ -2309,6 +2419,7 @@ impl<'a> MapRespCtx<'a> {
|
|||
client_type: ClientType::Desktop,
|
||||
artist: None,
|
||||
authenticated: false,
|
||||
session_po_token: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -2370,11 +2481,23 @@ mod tests {
|
|||
#[tokio::test]
|
||||
async fn get_visitor_data() {
|
||||
let rp = RustyPipe::new();
|
||||
let visitor_data = rp.get_visitor_data().await.unwrap();
|
||||
let visitor_data = rp.query().get_visitor_data(true).await.unwrap();
|
||||
|
||||
assert!(
|
||||
visitor_data.starts_with("Cg") && visitor_data.len() > 23,
|
||||
"invalid visitor data: {visitor_data}"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn get_po_token() {
|
||||
let rp = RustyPipe::builder().build().unwrap();
|
||||
let ident = "Cgt4eDYyVVJveGQtbyiLyvu8BjIKCgJERRIEEgAgKw==";
|
||||
let po_token = rp.query().get_po_token(ident).await.unwrap();
|
||||
|
||||
let token_bts = data_encoding::BASE64URL
|
||||
.decode(po_token.as_bytes())
|
||||
.unwrap();
|
||||
assert_eq!(token_bts.len(), ident.len() + 74);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Reference in a new issue