feat: add support for rustypipe-botguard to get PO tokens

This commit is contained in:
ThetaDev 2025-02-03 02:41:17 +01:00
parent 92340056f8
commit b90a252a5e
No known key found for this signature in database
GPG key ID: E319D3C5148D65B6
16 changed files with 313 additions and 100 deletions

View file

@ -25,6 +25,7 @@ mod video_details;
mod channel_rss;
use std::collections::HashMap;
use std::ffi::OsString;
use std::path::PathBuf;
use std::sync::{Arc, RwLock};
use std::{borrow::Cow, fmt::Debug, time::Duration};
@ -97,6 +98,13 @@ impl ClientType {
fn needs_deobf(self) -> bool {
!matches!(self, ClientType::Ios)
}
fn needs_po_token(self) -> bool {
matches!(
self,
ClientType::Desktop | ClientType::DesktopMusic | ClientType::Mobile
)
}
}
/// YouTube context request parameter
@ -317,7 +325,7 @@ pub(crate) const DEFAULT_UA: &str = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit
pub(crate) const MOBILE_UA: &str = "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.6778.135 Mobile Safari/537.36";
pub(crate) const TV_UA: &str = "Mozilla/5.0 (SMART-TV; Linux; Tizen 5.0) AppleWebKit/538.1 (KHTML, like Gecko) Version/5.0 NativeTVAds Safari/538.1";
const CONSENT_COOKIE: &str = "SOCS=CAISAiAD";
pub(crate) const CONSENT_COOKIE: &str = "SOCS=CAISAiAD";
const YOUTUBEI_V1_URL: &str = "https://www.youtube.com/youtubei/v1/";
const YOUTUBEI_V1_GAPIS_URL: &str = "https://youtubei.googleapis.com/youtubei/v1/";
@ -352,13 +360,6 @@ const OAUTH_SCOPES: &str = "http://gdata.youtube.com https://www.googleapis.com/
static CLIENT_VERSION_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r#""INNERTUBE_CONTEXT_CLIENT_VERSION":"([\w\d\._-]+?)""#).unwrap());
/// Default order of client types when fetching player data
///
/// The order may change in the future in case YouTube applies changes to their
/// platform that disable a client or make it less reliable.
pub const DEFAULT_PLAYER_CLIENT_ORDER: &[ClientType] =
&[ClientType::Ios, ClientType::Tv, ClientType::Android];
/// The RustyPipe client used to access YouTube's API
///
/// RustyPipe uses an [`Arc`] internally, so if you are using the client
@ -378,6 +379,7 @@ struct RustyPipeRef {
default_opts: RustyPipeOpts,
user_agent: Cow<'static, str>,
visitor_data_cache: VisitorDataCache,
botguard: Option<BotguardCfg>,
}
#[derive(Clone)]
@ -399,6 +401,12 @@ pub struct RustyPipeBuilder {
user_agent: Option<String>,
default_opts: RustyPipeOpts,
storage_dir: Option<PathBuf>,
botguard_bin: DefaultOpt<OsString>,
}
struct BotguardCfg {
program: OsString,
snapshot_file: PathBuf,
}
enum DefaultOpt<T> {
@ -415,6 +423,13 @@ impl<T> DefaultOpt<T> {
DefaultOpt::Default => Some(f()),
}
}
fn or_default_opt<F: FnOnce() -> Option<T>>(self, f: F) -> Option<T> {
match self {
DefaultOpt::Some(x) => Some(x),
DefaultOpt::None => None,
DefaultOpt::Default => f(),
}
}
}
/// # RustyPipe query
@ -477,7 +492,7 @@ impl<T> DefaultOpt<T> {
///
/// ## Options
///
/// You can set the language, country and visitor data cookie for individual requests.
/// You can set the language, country and visitor data ID for individual requests.
///
/// ```
/// # use rustypipe::client::RustyPipe;
@ -626,6 +641,7 @@ impl RustyPipeBuilder {
n_http_retries: 2,
user_agent: None,
storage_dir: None,
botguard_bin: DefaultOpt::Default,
}
}
@ -690,12 +706,25 @@ impl RustyPipeBuilder {
let visitor_data_cache = VisitorDataCache::new(http.clone());
let botguard_bin = self.botguard_bin.or_default_opt(|| {
let n = OsString::from("rustypipe-botguard");
let out = std::process::Command::new(&n)
.arg("--version")
.output()
.ok()?;
if out.status.success() {
Some(n)
} else {
None
}
});
Ok(RustyPipe {
inner: Arc::new(RustyPipeRef {
http,
storage,
reporter: self.reporter.or_default(|| {
let mut report_dir = storage_dir;
let mut report_dir = storage_dir.clone();
report_dir.push(DEFAULT_REPORT_DIR);
Box::new(FileReporter::new(report_dir))
}),
@ -709,6 +738,14 @@ impl RustyPipeBuilder {
default_opts: self.default_opts,
user_agent,
visitor_data_cache,
botguard: botguard_bin.map(|program| {
let mut snapshot_file = storage_dir;
snapshot_file.push("bg_snapshot.bin");
BotguardCfg {
program,
snapshot_file,
}
}),
}),
})
}
@ -868,14 +905,14 @@ impl RustyPipeBuilder {
self
}
/// Set the YouTube visitor data cookie
/// Set the YouTube visitor data ID
///
/// YouTube assigns a session cookie to each user which is used for personalized
/// recommendations. By default, RustyPipe does not send this cookie to preserve
/// user privacy. For requests that mandatate the cookie, a new one is requested
/// for every query.
///
/// This option allows you to manually set the visitor data cookie of your client,
/// This option allows you to manually set the visitor data ID of your client,
/// allowing you to get personalized recommendations or reproduce A/B tests.
///
/// Note that YouTube has a rate limit on the number of requests from a single
@ -888,7 +925,7 @@ impl RustyPipeBuilder {
self
}
/// Set the YouTube visitor data cookie to an optional value
/// Set the YouTube visitor data ID to an optional value
///
/// see also [`RustyPipeBuilder::visitor_data`]
///
@ -898,6 +935,26 @@ impl RustyPipeBuilder {
self.default_opts.visitor_data = visitor_data.map(S::into);
self
}
/// Disable RustyPipe Botguard
///
/// By default, RustyPipe uses the `rustypipe-botguard` binary if it is available. If you want to
/// use RustyPipe without Botguard, you can disable it.
pub fn no_botguard(mut self) -> Self {
self.botguard_bin = DefaultOpt::None;
self
}
/// Enable RustyPipe Botguard using the given binary
///
/// Botguard is required to generate PO tokens for accessing streams on browser-based clients.
/// By default, RustyPipe uses the `rustypipe-botguard` binary if it is available.
///
/// More information: <https://codeberg.org/ThetaDev/rustypipe-botguard>
pub fn botguard_bin<S: Into<OsString>>(mut self, botguard_bin: S) -> Self {
self.botguard_bin = DefaultOpt::Some(botguard_bin.into());
self
}
}
impl Default for RustyPipe {
@ -1191,17 +1248,6 @@ impl RustyPipe {
}
}
/// Request a new visitor data cookie from YouTube
///
/// Since the cookie is shared between YT and YTM and the YTM page loads faster,
/// we request that.
///
/// Sometimes YouTube does not set the `__Secure-YEC` cookie. In this case, the
/// visitor data is extracted from the html page.
async fn get_visitor_data(&self) -> Result<String, Error> {
self.inner.visitor_data_cache.new_visitor_data().await
}
/// Get a new device code for logging into YouTube
pub async fn user_auth_get_code(&self) -> Result<OauthDeviceCode, Error> {
tracing::debug!("getting OAuth user code");
@ -1618,14 +1664,14 @@ impl RustyPipeQuery {
self
}
/// Set the YouTube visitor data cookie
/// Set the YouTube visitor data ID
///
/// YouTube assigns a session cookie to each user which is used for personalized
/// recommendations. By default, RustyPipe does not send this cookie to preserve
/// user privacy. For requests that mandatate the cookie, a new one is requested
/// for every query.
///
/// This option allows you to manually set the visitor data cookie of your query,
/// This option allows you to manually set the visitor data ID of your query,
/// allowing you to get personalized recommendations or reproduce A/B tests.
///
/// Note that YouTube has a rate limit on the number of requests from a single
@ -1636,7 +1682,7 @@ impl RustyPipeQuery {
self
}
/// Set the YouTube visitor data cookie to an optional value
/// Set the YouTube visitor data ID to an optional value
///
/// see also [`RustyPipeQuery::visitor_data`]
#[must_use]
@ -1845,7 +1891,7 @@ impl RustyPipeQuery {
/// - `ctype`: Client type (`Desktop`, `DesktopMusic`, `Android`, ...)
/// - `method`: HTTP method
/// - `endpoint`: YouTube API endpoint (`https://www.youtube.com/youtubei/v1/<XYZ>?key=...`)
/// - `visitor_data`: YouTube visitor data cookie
/// - `visitor_data`: YouTube visitor data ID
async fn request_builder(
&self,
ctype: ClientType,
@ -1987,14 +2033,75 @@ impl RustyPipeQuery {
Some(format!("SAPISIDHASH {time_now}_{sapisidhash_hex}"))
}
/// Get a YouTube visitor data cookie, which is necessary for certain requests
pub async fn get_visitor_data(&self) -> Result<String, Error> {
/// Get a YouTube visitor data ID, which is necessary for certain requests
pub async fn get_visitor_data(&self, force_new: bool) -> Result<String, Error> {
if force_new {
return self
.client
.inner
.visitor_data_cache
.new_visitor_data()
.await;
}
match &self.opts.visitor_data {
Some(vd) => Ok(vd.clone()),
None => self.client.get_visitor_data().await,
None => self.client.inner.visitor_data_cache.get().await,
}
}
/// Remove a YouTube visitor data ID from the cache so it is not used again
pub fn remove_visitor_data(&self, visitor_data: &str) {
self.client.inner.visitor_data_cache.remove(visitor_data);
}
/// Get PO tokens
async fn get_po_tokens(&self, idents: &[&str]) -> Result<Vec<String>, Error> {
let bg = self
.client
.inner
.botguard
.as_ref()
.ok_or(Error::Extraction(ExtractionError::Botguard(
"not enabled".into(),
)))?;
let cmd = tokio::process::Command::new(&bg.program)
.arg("--snapshot-file")
.arg(&bg.snapshot_file)
.arg("--")
.args(idents)
.output()
.await
.map_err(|e| Error::Extraction(ExtractionError::Botguard(e.to_string().into())))?;
if !cmd.status.success() {
return Err(Error::Extraction(ExtractionError::Botguard(
String::from_utf8_lossy(&cmd.stderr).into_owned().into(),
)));
}
let output = String::from_utf8(cmd.stdout)
.map_err(|e| Error::Extraction(ExtractionError::Botguard(e.to_string().into())))?;
let tokens = output
.split_whitespace()
.take(idents.len())
.map(str::to_owned)
.collect::<Vec<_>>();
if tokens.len() != idents.len() {
return Err(Error::Extraction(ExtractionError::Botguard(
"too few tokens returned".into(),
)));
}
tracing::debug!("generated PO token");
Ok(tokens)
}
/// Get a PO token
pub async fn get_po_token<S: AsRef<str>>(self, ident: S) -> Result<String, Error> {
self.get_po_tokens(&[ident.as_ref()])
.await
.map(|res| res.into_iter().next().unwrap())
}
async fn yt_request_attempt<R: DeserializeOwned + MapResponse<M> + Debug, M>(
&self,
request: &Request,
@ -2128,6 +2235,7 @@ impl RustyPipeQuery {
client_type: ctype,
artist: ctx_src.artist,
authenticated: self.opts.auth.unwrap_or_default(),
session_po_token: ctx_src.session_po_token,
};
let request = self
@ -2284,6 +2392,7 @@ struct MapRespCtx<'a> {
client_type: ClientType,
artist: Option<ArtistId>,
authenticated: bool,
session_po_token: Option<&'a str>,
}
/// Options to give to the mapper when making requests;
@ -2294,6 +2403,7 @@ struct MapRespOptions<'a> {
deobf: Option<&'a DeobfData>,
artist: Option<ArtistId>,
unlocalized: bool,
session_po_token: Option<&'a str>,
}
#[allow(clippy::needless_lifetimes)]
@ -2309,6 +2419,7 @@ impl<'a> MapRespCtx<'a> {
client_type: ClientType::Desktop,
artist: None,
authenticated: false,
session_po_token: None,
}
}
}
@ -2370,11 +2481,23 @@ mod tests {
#[tokio::test]
async fn get_visitor_data() {
let rp = RustyPipe::new();
let visitor_data = rp.get_visitor_data().await.unwrap();
let visitor_data = rp.query().get_visitor_data(true).await.unwrap();
assert!(
visitor_data.starts_with("Cg") && visitor_data.len() > 23,
"invalid visitor data: {visitor_data}"
);
}
#[tokio::test]
async fn get_po_token() {
let rp = RustyPipe::builder().build().unwrap();
let ident = "Cgt4eDYyVVJveGQtbyiLyvu8BjIKCgJERRIEEgAgKw==";
let po_token = rp.query().get_po_token(ident).await.unwrap();
let token_bts = data_encoding::BASE64URL
.decode(po_token.as_bytes())
.unwrap();
assert_eq!(token_bts.len(), ident.len() + 74);
}
}

View file

@ -26,7 +26,6 @@ use super::{
player::{self, Format},
},
ClientType, MapRespCtx, MapRespOptions, MapResponse, MapResult, RustyPipeQuery,
DEFAULT_PLAYER_CLIENT_ORDER,
};
#[derive(Debug, Serialize)]
@ -41,6 +40,9 @@ struct QPlayer<'a> {
content_check_ok: bool,
/// Probably refers to allowing sensitive content, too
racy_check_ok: bool,
/// Botguard data
#[serde(skip_serializing_if = "Option::is_none")]
service_integrity_dimensions: Option<ServiceIntegrity>,
}
#[derive(Debug, Serialize)]
@ -70,10 +72,16 @@ struct QDrmLicense<'a> {
drm_video_feature: &'a str,
}
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
struct ServiceIntegrity {
po_token: String,
}
impl RustyPipeQuery {
/// Get YouTube player data (video/audio streams + basic metadata)
pub async fn player<S: AsRef<str> + Debug>(&self, video_id: S) -> Result<VideoPlayer, Error> {
self.player_from_clients(video_id, DEFAULT_PLAYER_CLIENT_ORDER)
self.player_from_clients(video_id, self.player_client_order())
.await
}
@ -142,28 +150,46 @@ impl RustyPipeQuery {
client_type: ClientType,
) -> Result<VideoPlayer, Error> {
let video_id = video_id.as_ref();
let mut deobf = None;
let request_body = if client_type.needs_deobf() {
deobf = Some(self.client.get_deobf_data().await?);
QPlayer {
playback_context: Some(QPlaybackContext {
content_playback_context: QContentPlaybackContext {
signature_timestamp: &deobf.as_ref().unwrap().sts,
referer: format!("https://www.youtube.com/watch?v={video_id}"),
},
}),
video_id,
content_check_ok: true,
racy_check_ok: true,
}
} else {
QPlayer {
playback_context: None,
video_id,
content_check_ok: true,
racy_check_ok: true,
let visitor_data = self.get_visitor_data(false).await?;
let (deobf, (service_integrity_dimensions, session_po_token)) = tokio::try_join!(
async {
if client_type.needs_deobf() {
Ok::<_, Error>(Some(self.client.get_deobf_data().await?))
} else {
Ok(None)
}
},
async {
if client_type.needs_po_token() {
let mut po_tokens = self
.get_po_tokens(&[video_id, &visitor_data])
.await?
.into_iter();
let po_token = po_tokens.next().unwrap();
let session_po_token = po_tokens.next().unwrap();
Ok((Some(ServiceIntegrity { po_token }), Some(session_po_token)))
} else {
Ok((None, None))
}
}
)?;
let playback_context = deobf.as_ref().map(|deobf| QPlaybackContext {
content_playback_context: QContentPlaybackContext {
signature_timestamp: &deobf.sts,
referer: format!("https://www.youtube.com/watch?v={video_id}"),
},
});
let request_body = QPlayer {
playback_context,
video_id,
content_check_ok: true,
racy_check_ok: true,
service_integrity_dimensions,
};
self.execute_request_ctx::<response::Player, _, _>(
@ -173,14 +199,28 @@ impl RustyPipeQuery {
"player",
&request_body,
MapRespOptions {
visitor_data: Some(&visitor_data),
deobf: deobf.as_ref(),
unlocalized: true,
session_po_token: session_po_token.as_deref(),
..Default::default()
},
)
.await
}
/// Get the default order of client types when fetching player data
///
/// The order may change in the future in case YouTube applies changes to their
/// platform that disable a client or make it less reliable.
pub fn player_client_order(&self) -> &'static [ClientType] {
if self.client.inner.botguard.is_some() {
&[ClientType::Desktop, ClientType::Ios, ClientType::Tv]
} else {
&[ClientType::Ios, ClientType::Tv]
}
}
/// Get a license to play back DRM protected videos
///
/// Requires authentication (either via OAuth or cookies).
@ -250,6 +290,7 @@ impl MapResponse<VideoPlayer> for response::Player {
"country" => Some(UnavailabilityReason::Geoblocked),
"version" | "websites" => Some(UnavailabilityReason::UnsupportedClient),
"bot" => Some(UnavailabilityReason::IpBan),
"later." => Some(UnavailabilityReason::TryAgain),
_ => None,
})
.unwrap_or_default();
@ -327,7 +368,7 @@ impl MapResponse<VideoPlayer> for response::Player {
};
let streams = if !is_live {
let mut mapper = StreamsMapper::new(ctx.deobf)?;
let mut mapper = StreamsMapper::new(ctx.deobf, ctx.session_po_token)?;
mapper.map_streams(streaming_data.formats);
mapper.map_streams(streaming_data.adaptive_formats);
let mut res = mapper.output()?;
@ -442,8 +483,9 @@ impl MapResponse<VideoPlayer> for response::Player {
}
}
struct StreamsMapper {
struct StreamsMapper<'a> {
deobf: Option<Deobfuscator>,
session_po_token: Option<&'a str>,
streams: Streams,
warnings: Vec<String>,
/// First stream mapping error
@ -461,8 +503,11 @@ struct Streams {
audio_streams: Vec<AudioStream>,
}
impl StreamsMapper {
fn new(deobf_data: Option<&DeobfData>) -> Result<Self, DeobfError> {
impl<'a> StreamsMapper<'a> {
fn new(
deobf_data: Option<&DeobfData>,
session_po_token: Option<&'a str>,
) -> Result<Self, DeobfError> {
let deobf = match deobf_data {
Some(deobf_data) => Some(Deobfuscator::new(deobf_data)?),
None => None,
@ -470,6 +515,7 @@ impl StreamsMapper {
Ok(Self {
deobf,
session_po_token,
streams: Streams::default(),
warnings: Vec::new(),
first_err: None,
@ -609,6 +655,10 @@ impl StreamsMapper {
}?;
self.deobf_nsig(&mut url_params)?;
if let Some(pot) = self.session_po_token {
url_params.insert("pot".to_owned(), pot.to_owned());
}
let url = Url::parse_with_params(url_base.as_str(), url_params.iter())
.map_err(|_| ExtractionError::InvalidData("could not combine URL".into()))?;
@ -880,6 +930,7 @@ mod tests {
client_type,
artist: None,
authenticated: false,
session_po_token: None,
})
.unwrap();
@ -905,7 +956,7 @@ mod tests {
#[test]
fn cipher_to_url() {
let signature_cipher = "s=w%3DAe%3DA6aDNQLkViKS7LOm9QtxZJHKwb53riq9qEFw-ecBWJCAiA%3DcEg0tn3dty9jEHszfzh4Ud__bg9CEHVx4ix-7dKsIPAhIQRw8JQ0qOA&sp=sig&url=https://rr5---sn-h0jelnez.googlevideo.com/videoplayback%3Fexpire%3D1659376413%26ei%3Dvb7nYvH5BMK8gAfBj7ToBQ%26ip%3D2003%253Ade%253Aaf06%253A6300%253Ac750%253A1b77%253Ac74a%253A80e3%26id%3Do-AB_BABwrXZJN428ZwDxq5ScPn2AbcGODnRlTVhCQ3mj2%26itag%3D251%26source%3Dyoutube%26requiressl%3Dyes%26mh%3DhH%26mm%3D31%252C26%26mn%3Dsn-h0jelnez%252Csn-4g5ednsl%26ms%3Dau%252Conr%26mv%3Dm%26mvi%3D5%26pl%3D37%26initcwndbps%3D1588750%26spc%3DlT-Khi831z8dTejFIRCvCEwx_6romtM%26vprv%3D1%26mime%3Daudio%252Fwebm%26ns%3Db_Mq_qlTFcSGlG9RpwpM9xQH%26gir%3Dyes%26clen%3D3781277%26dur%3D229.301%26lmt%3D1655510291473933%26mt%3D1659354538%26fvip%3D5%26keepalive%3Dyes%26fexp%3D24001373%252C24007246%26c%3DWEB%26rbqsm%3Dfr%26txp%3D4532434%26n%3Dd2g6G2hVqWIXxedQ%26sparams%3Dexpire%252Cei%252Cip%252Cid%252Citag%252Csource%252Crequiressl%252Cspc%252Cvprv%252Cmime%252Cns%252Cgir%252Cclen%252Cdur%252Clmt%26lsparams%3Dmh%252Cmm%252Cmn%252Cms%252Cmv%252Cmvi%252Cpl%252Cinitcwndbps%26lsig%3DAG3C_xAwRQIgCKCGJ1iu4wlaGXy3jcJyU3inh9dr1FIfqYOZEG_MdmACIQCbungkQYFk7EhD6K2YvLaHFMjKOFWjw001_tLb0lPDtg%253D%253D";
let mut mapper = StreamsMapper::new(Some(&DEOBF_DATA)).unwrap();
let mut mapper = StreamsMapper::new(Some(&DEOBF_DATA), None).unwrap();
let url = mapper
.map_url(&None, &Some(signature_cipher.to_owned()))
.unwrap()

View file

@ -58,6 +58,9 @@ pub enum ExtractionError {
/// Error deobfuscating YouTube's URL signatures
#[error("deobfuscation error: {0}")]
Deobfuscation(Cow<'static, str>),
/// Error generating Botguard tokens
#[error("botguard error: {0}")]
Botguard(Cow<'static, str>),
/// YouTube returned data that does not match the queried ID
///
/// Specifically YouTube may return this video <https://www.youtube.com/watch?v=aQvGIIdgFDM>,
@ -102,6 +105,8 @@ pub enum UnavailabilityReason {
OfflineLivestream,
/// YouTube banned your IP address from accessing the platform without an account
IpBan,
/// Video temporarily unavailable (rate limit)
TryAgain,
/// Video cant be played for other reasons
#[default]
Unplayable,
@ -120,6 +125,7 @@ impl Display for UnavailabilityReason {
UnavailabilityReason::MembersOnly => f.write_str("members-only"),
UnavailabilityReason::OfflineLivestream => f.write_str("offline stream"),
UnavailabilityReason::IpBan => f.write_str("ip-ban"),
UnavailabilityReason::TryAgain => f.write_str("try again"),
UnavailabilityReason::Unplayable => f.write_str("unplayable"),
}
}
@ -220,7 +226,13 @@ impl Error {
Ok(status) => status.is_server_error() || status == StatusCode::TOO_MANY_REQUESTS,
Err(_) => false,
},
Self::Extraction(ExtractionError::InvalidData(_)) => true,
Self::Extraction(
ExtractionError::InvalidData(_)
| ExtractionError::Unavailable {
reason: UnavailabilityReason::TryAgain,
..
},
) => true,
_ => false,
}
}
@ -232,9 +244,10 @@ impl ExtractionError {
matches!(
self,
ExtractionError::Unavailable {
reason: UnavailabilityReason::UnsupportedClient,
reason: UnavailabilityReason::UnsupportedClient | UnavailabilityReason::TryAgain,
..
} | ExtractionError::WrongResult(_)
| ExtractionError::Botguard(_)
)
}

View file

@ -149,7 +149,7 @@ pub struct VideoPlayer {
pub drm: Option<VideoPlayerDrm>,
/// Client type with which the player was fetched
pub client_type: ClientType,
/// YouTube visitor data cookie
/// YouTube visitor data ID
pub visitor_data: Option<String>,
}
@ -615,7 +615,7 @@ pub struct Playlist {
pub last_update: Option<Date>,
/// Textual last update date
pub last_update_txt: Option<String>,
/// YouTube visitor data cookie
/// YouTube visitor data ID
pub visitor_data: Option<String>,
}
@ -683,7 +683,7 @@ pub struct VideoDetails {
///
/// Is initially empty.
pub latest_comments: Paginator<Comment>,
/// YouTube visitor data cookie
/// YouTube visitor data ID
pub visitor_data: Option<String>,
}
@ -820,7 +820,7 @@ pub struct Channel<T> {
pub has_shorts: bool,
/// Does the channel have a *Live* tab?
pub has_live: bool,
/// YouTube visitor data cookie
/// YouTube visitor data ID
pub visitor_data: Option<String>,
/// Content fetched from the channel
pub content: T,
@ -905,7 +905,7 @@ pub struct SearchResult<T> {
/// for the corrected search term and displays it on top of the
/// search results page.
pub corrected_query: Option<String>,
/// YouTube visitor data cookie
/// YouTube visitor data ID
pub visitor_data: Option<String>,
}

View file

@ -6,7 +6,7 @@ use regex::Regex;
use reqwest::{header, Client};
use crate::{
client::YOUTUBE_MUSIC_HOME_URL,
client::{CONSENT_COOKIE, YOUTUBE_MUSIC_HOME_URL},
error::{Error, ExtractionError},
util,
};
@ -35,9 +35,9 @@ struct VisitorDataCacheRef {
static VISITOR_DATA_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r#""visitorData":"([\w\d_\-%]+?)""#).unwrap());
/// Number of requests after which a new token is requested
const REQ_LIMIT: u32 = 10;
/// Maximum size of the cache (-1)
const MAX_SIZE: usize = 99;
const REQ_LIMIT: u32 = 50;
/// Maximum size of the cache
const MAX_SIZE: usize = 20;
impl VisitorDataCache {
pub fn new(http: Client) -> Self {
@ -59,6 +59,7 @@ impl VisitorDataCache {
.get(YOUTUBE_MUSIC_HOME_URL)
.header(header::ORIGIN, YOUTUBE_MUSIC_HOME_URL)
.header(header::REFERER, YOUTUBE_MUSIC_HOME_URL)
.header(header::COOKIE, CONSENT_COOKIE)
.send()
.await?;
@ -100,10 +101,11 @@ impl VisitorDataCache {
}
pub async fn new_visitor_data(&self) -> Result<String, Error> {
let vd = self.get_visitor_data().await.unwrap();
self.inner
.req_counter
.store(0, std::sync::atomic::Ordering::SeqCst);
let vd = self.get_visitor_data().await.unwrap();
.store(0, std::sync::atomic::Ordering::Relaxed);
let mut vds = self.inner.visitor_data.write().unwrap();
for _ in 0..(vds.len().saturating_sub(MAX_SIZE)) {
let rem = vds.remove(0);
@ -119,9 +121,12 @@ impl VisitorDataCache {
if self
.inner
.req_counter
.fetch_add(1, std::sync::atomic::Ordering::SeqCst)
.fetch_add(1, std::sync::atomic::Ordering::Relaxed)
>= REQ_LIMIT
{
self.inner
.req_counter
.store(0, std::sync::atomic::Ordering::Relaxed);
let nc = self.clone();
tokio::spawn(async move { nc.new_visitor_data().await });
}
@ -138,6 +143,14 @@ impl VisitorDataCache {
// Fetch new visitor data if the cache is empty
self.new_visitor_data().await
}
pub fn remove(&self, visitor_data: &str) {
let mut vds = self.inner.visitor_data.write().unwrap();
if let Some(i) = vds.iter().position(|x| x == visitor_data) {
vds.remove(i);
tracing::debug!("visitor data {visitor_data} removed from cache");
}
}
}
#[cfg(test)]