diff --git a/src/client/mod.rs b/src/client/mod.rs index 2ef3a1f..f9462cb 100644 --- a/src/client/mod.rs +++ b/src/client/mod.rs @@ -38,6 +38,7 @@ use time::OffsetDateTime; use tokio::sync::RwLock as AsyncRwLock; use crate::error::AuthError; +use crate::util::VisitorDataCache; use crate::{ cache::{CacheStorage, FileStorage, DEFAULT_CACHE_FILE}, deobfuscate::DeobfData, @@ -312,9 +313,9 @@ impl AuthCookie { } } -const DEFAULT_UA: &str = "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0"; -const MOBILE_UA: &str = "Mozilla/5.0 (Android 14; Mobile; rv:129.0) Gecko/129.0 Firefox/129.0"; -const TV_UA: &str = "Mozilla/5.0 (SMART-TV; Linux; Tizen 5.0) AppleWebKit/538.1 (KHTML, like Gecko) Version/5.0 NativeTVAds Safari/538.1"; +pub(crate) const DEFAULT_UA: &str = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"; +pub(crate) const MOBILE_UA: &str = "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.6778.135 Mobile Safari/537.36"; +pub(crate) const TV_UA: &str = "Mozilla/5.0 (SMART-TV; Linux; Tizen 5.0) AppleWebKit/538.1 (KHTML, like Gecko) Version/5.0 NativeTVAds Safari/538.1"; const CONSENT_COOKIE: &str = "SOCS=CAISAiAD"; @@ -323,7 +324,7 @@ const YOUTUBEI_V1_GAPIS_URL: &str = "https://youtubei.googleapis.com/youtubei/v1 const YOUTUBE_MUSIC_V1_URL: &str = "https://music.youtube.com/youtubei/v1/"; const YOUTUBEI_MOBILE_V1_URL: &str = "https://m.youtube.com/youtubei/v1/"; const YOUTUBE_HOME_URL: &str = "https://www.youtube.com"; -const YOUTUBE_MUSIC_HOME_URL: &str = "https://music.youtube.com"; +pub(crate) const YOUTUBE_MUSIC_HOME_URL: &str = "https://music.youtube.com"; const YOUTUBE_MOBILE_HOME_URL: &str = "https://m.youtube.com"; const YOUTUBE_TV_URL: &str = "https://www.youtube.com/tv"; @@ -350,8 +351,6 @@ const OAUTH_SCOPES: &str = "http://gdata.youtube.com https://www.googleapis.com/ static CLIENT_VERSION_REGEX: Lazy = Lazy::new(|| Regex::new(r#""INNERTUBE_CONTEXT_CLIENT_VERSION":"([\w\d\._-]+?)""#).unwrap()); -static VISITOR_DATA_REGEX: Lazy = - Lazy::new(|| Regex::new(r#""visitorData":"([\w\d_\-%]+?)""#).unwrap()); /// Default order of client types when fetching player data /// @@ -378,6 +377,7 @@ struct RustyPipeRef { cache: CacheHolder, default_opts: RustyPipeOpts, user_agent: Cow<'static, str>, + visitor_data_cache: VisitorDataCache, } #[derive(Clone)] @@ -688,6 +688,8 @@ impl RustyPipeBuilder { }) .collect::>(); + let visitor_data_cache = VisitorDataCache::new(http.clone()); + Ok(RustyPipe { inner: Arc::new(RustyPipeRef { http, @@ -706,6 +708,7 @@ impl RustyPipeBuilder { }, default_opts: self.default_opts, user_agent, + visitor_data_cache, }), }) } @@ -1196,51 +1199,7 @@ impl RustyPipe { /// Sometimes YouTube does not set the `__Secure-YEC` cookie. In this case, the /// visitor data is extracted from the html page. async fn get_visitor_data(&self) -> Result { - tracing::debug!("getting YT visitor data"); - let resp = self - .inner - .http - .get(YOUTUBE_MUSIC_HOME_URL) - .header(header::ORIGIN, YOUTUBE_MUSIC_HOME_URL) - .header(header::REFERER, YOUTUBE_MUSIC_HOME_URL) - .send() - .await?; - - let vdata = resp - .headers() - .get_all(header::SET_COOKIE) - .iter() - .find_map(|c| { - if let Ok(cookie) = c.to_str() { - if let Some(after) = cookie.strip_prefix("__Secure-YEC=") { - return after - .split_once(';') - .map(|s| s.0.to_owned()) - .filter(|s| !s.is_empty()); - } - } - None - }); - - match vdata { - Some(vdata) => Ok(vdata), - None => { - if resp.status().is_success() { - // Extract visitor data from html - let html = resp.text().await?; - - util::get_cg_from_regex(&VISITOR_DATA_REGEX, &html, 1).ok_or(Error::Extraction( - ExtractionError::InvalidData( - "Could not find visitor data on html page".into(), - ), - )) - } else { - Err(Error::Extraction(ExtractionError::InvalidData( - format!("Could not get visitor data, status: {}", resp.status()).into(), - ))) - } - } - } + self.inner.visitor_data_cache.new_visitor_data().await } /// Get a new device code for logging into YouTube @@ -2147,11 +2106,14 @@ impl RustyPipeQuery { ) -> Result { tracing::debug!("getting {}({})", operation, id); - let visitor_data = ctx_src + let visitor_data = match ctx_src .visitor_data .or(self.opts.visitor_data.as_deref()) .map(Cow::Borrowed) - .unwrap_or_else(|| util::random_visitor_data(self.opts.country).into()); + { + Some(vd) => vd, + None => self.client.inner.visitor_data_cache.get().await?.into(), + }; let context = self .get_context(ctype, !ctx_src.unlocalized, &visitor_data) @@ -2289,12 +2251,10 @@ impl RustyPipeQuery { endpoint: &str, body: &B, ) -> Result { - let visitor_data = self - .opts - .visitor_data - .as_deref() - .map(Cow::Borrowed) - .unwrap_or_else(|| util::random_visitor_data(self.opts.country).into()); + let visitor_data = match self.opts.visitor_data.as_deref().map(Cow::Borrowed) { + Some(vd) => vd, + None => self.client.inner.visitor_data_cache.get().await?.into(), + }; let context = self.get_context(ctype, true, &visitor_data).await; let req_body = QBody { context, body }; diff --git a/src/util/mod.rs b/src/util/mod.rs index fe294c7..9a49571 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -1,11 +1,13 @@ mod date; mod protobuf; +mod visitor_data; pub mod dictionary; pub mod timeago; pub use date::{now_sec, shift_months, shift_weeks_mo, shift_years}; pub use protobuf::{string_from_pb, ProtoBuilder}; +pub use visitor_data::VisitorDataCache; use std::{ collections::BTreeMap, @@ -99,29 +101,6 @@ pub fn random_uuid() -> String { ) } -/// Generate a random visitor data cookie -pub fn random_visitor_data(country: Country) -> String { - let mut rng = rand::thread_rng(); - - let mut pb_e2 = ProtoBuilder::new(); - pb_e2.string(2, ""); - pb_e2.varint(4, rng.gen_range(1..256)); - - let mut pb_e = ProtoBuilder::new(); - pb_e.string(1, &country.to_string()); - pb_e.embedded(2, pb_e2); - - let mut pb = ProtoBuilder::new(); - pb.string(1, &random_string(CONTENT_PLAYBACK_NONCE_ALPHABET, 11)); - pb.varint( - 5, - (time::OffsetDateTime::now_utc().unix_timestamp() as u64) - .saturating_sub(rng.gen_range(0..600_000)), - ); - pb.embedded(6, pb_e); - pb.to_base64() -} - /// Split an URL into its base string and parameter map /// /// Example: diff --git a/src/util/visitor_data.rs b/src/util/visitor_data.rs new file mode 100644 index 0000000..ac2bd13 --- /dev/null +++ b/src/util/visitor_data.rs @@ -0,0 +1,176 @@ +use std::sync::{atomic::AtomicU32, Arc, RwLock}; + +use once_cell::sync::Lazy; +use rand::Rng; +use regex::Regex; +use reqwest::{header, Client}; + +use crate::{ + client::YOUTUBE_MUSIC_HOME_URL, + error::{Error, ExtractionError}, + util, +}; + +/// To increase privacy and possibly circumvent rate limits, RustyPipe uses multiple +/// visitor data IDs. These are held in this cache object. +/// +/// On instantiation, the cache is empty, so for the first requests new visitor data IDs +/// have to be requested. For subsequent requests a random ID from the cache is picked. +/// After req_limit requests, a new token is requested asynchronously and added to the cache +/// to prevent the IDs from being overused. +/// +/// The cache holds a maximum of 100 visitor data IDs. If more are added, the oldest ones +/// are evicted. +#[derive(Clone)] +pub struct VisitorDataCache { + inner: Arc, +} + +struct VisitorDataCacheRef { + req_counter: AtomicU32, + visitor_data: RwLock>, + http: Client, +} + +static VISITOR_DATA_REGEX: Lazy = + Lazy::new(|| Regex::new(r#""visitorData":"([\w\d_\-%]+?)""#).unwrap()); +/// Number of requests after which a new token is requested +const REQ_LIMIT: u32 = 10; +/// Maximum size of the cache (-1) +const MAX_SIZE: usize = 99; + +impl VisitorDataCache { + pub fn new(http: Client) -> Self { + Self { + inner: VisitorDataCacheRef { + req_counter: Default::default(), + visitor_data: Default::default(), + http, + } + .into(), + } + } + + async fn get_visitor_data(&self) -> Result { + tracing::debug!("getting YT visitor data"); + let resp = self + .inner + .http + .get(YOUTUBE_MUSIC_HOME_URL) + .header(header::ORIGIN, YOUTUBE_MUSIC_HOME_URL) + .header(header::REFERER, YOUTUBE_MUSIC_HOME_URL) + .send() + .await?; + + let vdata = resp + .headers() + .get_all(header::SET_COOKIE) + .iter() + .find_map(|c| { + if let Ok(cookie) = c.to_str() { + if let Some(after) = cookie.strip_prefix("__Secure-YEC=") { + return after + .split_once(';') + .map(|s| s.0.to_owned()) + .filter(|s| !s.is_empty()); + } + } + None + }); + + match vdata { + Some(vdata) => Ok(vdata), + None => { + if resp.status().is_success() { + // Extract visitor data from html + let html = resp.text().await?; + + util::get_cg_from_regex(&VISITOR_DATA_REGEX, &html, 1).ok_or(Error::Extraction( + ExtractionError::InvalidData( + "Could not find visitor data on html page".into(), + ), + )) + } else { + Err(Error::Extraction(ExtractionError::InvalidData( + format!("Could not get visitor data, status: {}", resp.status()).into(), + ))) + } + } + } + } + + pub async fn new_visitor_data(&self) -> Result { + self.inner + .req_counter + .store(0, std::sync::atomic::Ordering::SeqCst); + let vd = self.get_visitor_data().await.unwrap(); + let mut vds = self.inner.visitor_data.write().unwrap(); + for _ in 0..(vds.len().saturating_sub(MAX_SIZE)) { + let rem = vds.remove(0); + tracing::debug!("visitor data {rem} removed from cache"); + } + vds.push(vd.to_owned()); + tracing::debug!("visitor data {} added to cache ({} ids)", vd, vds.len()); + Ok(vd) + } + + pub async fn get(&self) -> Result { + // Request new visitor data in the background every 10 requests + if self + .inner + .req_counter + .fetch_add(1, std::sync::atomic::Ordering::SeqCst) + >= REQ_LIMIT + { + let nc = self.clone(); + tokio::spawn(async move { nc.new_visitor_data().await }); + } + + { + let vds = self.inner.visitor_data.read().unwrap(); + if !vds.is_empty() { + let mut rng = rand::thread_rng(); + let vd = vds[rng.gen_range(0..vds.len())].to_owned(); + tracing::debug!("visitor data {vd} picked from cache"); + return Ok(vd); + } + } + // Fetch new visitor data if the cache is empty + self.new_visitor_data().await + } +} + +#[cfg(test)] +mod tests { + use std::time::Duration; + + use crate::client::DEFAULT_UA; + + use super::*; + + use tracing_test::traced_test; + + #[tokio::test] + #[traced_test] + async fn get_visitor_data() { + let cache = + VisitorDataCache::new(Client::builder().user_agent(DEFAULT_UA).build().unwrap()); + // Get initial visitor data + let v1 = cache.get().await.unwrap(); + + // Run as many request as necessary to fetch second visitor data + for _ in 0..=REQ_LIMIT { + let got = cache.get().await.unwrap(); + assert_eq!(got, v1); + } + + // Second visitor data does not arrive instantly, request immediately after returns the first data + let vds_len = cache.inner.visitor_data.read().unwrap().len(); + assert_eq!(vds_len, 1); + + // Wait for the second visitor data to arrive + tokio::time::sleep(Duration::from_millis(1000)).await; + let vds_len = cache.inner.visitor_data.read().unwrap().len(); + assert_eq!(vds_len, 2); + } +}