diff --git a/.forgejo/workflows/ci.yaml b/.forgejo/workflows/ci.yaml index 3f6ec46..4598aa8 100644 --- a/.forgejo/workflows/ci.yaml +++ b/.forgejo/workflows/ci.yaml @@ -24,6 +24,13 @@ jobs: with: cache-on-failure: "true" + - name: Download rustypipe-botguard + run: | + TARGET=$(rustc --version --verbose | grep "host:" | sed -e 's/^host: //') + curl -SsL -o rustypipe-botguard.gz "https://thetadev.de/dl/rustypipe-botguard-${TARGET}.gz" + sudo gunzip -c rustypipe-botguard.gz > /usr/local/bin/rustypipe-botguard + sudo chmod +x /usr/local/bin/rustypipe-botguard + - name: 📎 Clippy run: cargo clippy --all --tests --features=rss,indicatif,audiotag -- -D warnings diff --git a/.gitignore b/.gitignore index 3a5faf8..3e0f26a 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ rustypipe_reports rustypipe_cache*.json +bg_snapshot.bin diff --git a/Cargo.toml b/Cargo.toml index 7289aa7..efc8ce3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -99,7 +99,7 @@ fancy-regex.workspace = true thiserror.workspace = true url.workspace = true reqwest = { workspace = true, features = ["json", "gzip", "brotli"] } -tokio = { workspace = true, features = ["macros", "time"] } +tokio = { workspace = true, features = ["macros", "time", "process"] } serde.workspace = true serde_json.workspace = true serde_with.workspace = true diff --git a/cli/README.md b/cli/README.md index d69164b..1a6513d 100644 --- a/cli/README.md +++ b/cli/README.md @@ -75,7 +75,7 @@ videos can be downloaded in parallel for improved performance. ## `vdata`: Get visitor data -You can use the vdata command to get a new visitor data cookie. This feature may come in +You can use the vdata command to get a new visitor data ID. This feature may come in handy for testing and reproducing A/B tests. ## `releases` Get YouTube Music new releases @@ -130,7 +130,7 @@ Fetch a list of all the items saved in your YouTube/YouTube Music profile. and `ALL_PROXY` - **Logging:** You can change the log level with the `RUST_LOG` environment variable, it is set to `info` by default -- **Visitor data:** A custom visitor data cookie can be used with the `--vdata` flag +- **Visitor data:** A custom visitor data ID can be used with the `--vdata` flag - **Authentication:** Use the commands `rustypipe login` and `rustypipe login --cookie` to log into your Google account using either OAuth or YouTube cookies. With the `--auth` flag you can use authentication for any request. diff --git a/cli/src/main.rs b/cli/src/main.rs index 8c9a41e..c3dd816 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -19,8 +19,8 @@ use rustypipe::{ model::{ richtext::{RichText, ToPlaintext}, traits::YtEntity, - ArtistId, Comment, MusicSearchResult, TrackItem, TrackType, UrlTarget, Verification, - YouTubeItem, + ArtistId, AudioCodec, Comment, MusicSearchResult, TrackItem, TrackType, UrlTarget, + Verification, YouTubeItem, }, param::{search_filter, ChannelVideoTab, Country, Language, StreamFilter}, report::FileReporter, @@ -45,7 +45,7 @@ struct Cli { /// Always generate a report (used for debugging) #[clap(long, global = true)] report: bool, - /// YouTube visitor data cookie + /// YouTube visitor data ID #[clap(long, global = true)] vdata: Option, /// YouTube content language @@ -299,7 +299,7 @@ enum Commands { #[clap(long)] pretty: bool, }, - /// Get a YouTube visitor data cookie + /// Get a YouTube visitor data ID Vdata, /// Log in using your Google account Login { @@ -925,7 +925,9 @@ async fn run() -> anyhow::Result<()> { let mut filter = StreamFilter::new(); if let Some(res) = resolution { if res == 0 { - filter = filter.no_video(); + filter = filter + .no_video() + .audio_codecs([AudioCodec::Mp4a, AudioCodec::Opus]); } else { filter = filter.video_max_res(res); } @@ -1716,7 +1718,7 @@ async fn run() -> anyhow::Result<()> { } } Commands::Vdata => { - let vd = rp.query().get_visitor_data().await?; + let vd = rp.query().get_visitor_data(true).await?; println!("{vd}"); } Commands::Login { diff --git a/codegen/src/abtest.rs b/codegen/src/abtest.rs index 22c0b0a..91aaf5e 100644 --- a/codegen/src/abtest.rs +++ b/codegen/src/abtest.rs @@ -94,7 +94,7 @@ pub async fn run_test( let rp = rp.clone(); let pb = pb.clone(); async move { - let visitor_data = rp.query().get_visitor_data().await.unwrap(); + let visitor_data = rp.query().get_visitor_data(true).await.unwrap(); let query = rp.query().visitor_data(&visitor_data); let is_present = match ab { ABTest::AttributedTextDescription => attributed_text_description(&query).await, diff --git a/downloader/src/error.rs b/downloader/src/error.rs index 9e9d84c..c9ad088 100644 --- a/downloader/src/error.rs +++ b/downloader/src/error.rs @@ -13,8 +13,8 @@ pub enum DownloadError { #[error("http error: {0}")] Http(#[from] reqwest::Error), /// 403 error trying to download video - #[error("YouTube returned 403 error")] - Forbidden(ClientType), + #[error("YouTube returned 403 error; visitor_data={}", .1.as_deref().unwrap_or_default())] + Forbidden(ClientType, Option), /// File IO error #[error(transparent)] Io(#[from] std::io::Error), diff --git a/downloader/src/lib.rs b/downloader/src/lib.rs index 6bf238d..29f5469 100644 --- a/downloader/src/lib.rs +++ b/downloader/src/lib.rs @@ -21,7 +21,7 @@ use rand::Rng; use regex::Regex; use reqwest::{header, Client, StatusCode, Url}; use rustypipe::{ - client::{ClientType, RustyPipe, DEFAULT_PLAYER_CLIENT_ORDER}, + client::{ClientType, RustyPipe}, model::{ traits::{FileFormat, YtEntity}, AudioCodec, TrackItem, VideoCodec, VideoPlayer, @@ -698,9 +698,9 @@ impl DownloadQuery { .await { Ok(res) => return Ok(res), - Err(DownloadError::Forbidden(c)) => { + Err(DownloadError::Forbidden(c, vd)) => { failed_client = Some(c); - DownloadError::Forbidden(c) + DownloadError::Forbidden(c, vd) } Err(DownloadError::Http(e)) => { if !e.is_timeout() { @@ -770,7 +770,7 @@ impl DownloadQuery { .as_ref() .or(self.dl.i.client_types.as_ref()) .map(Vec::as_slice) - .unwrap_or(DEFAULT_PLAYER_CLIENT_ORDER), + .unwrap_or(q.player_client_order()), ); // If the last download failed, try another client if possible @@ -885,7 +885,14 @@ impl DownloadQuery { .map_err(|e| { if let DownloadError::Http(e) = &e { if e.status() == Some(StatusCode::FORBIDDEN) { - return DownloadError::Forbidden(player_data.client_type); + // 403 errors may occur due to bad visitor data IDs + if let Some(vd) = &player_data.visitor_data { + q.remove_visitor_data(vd); + } + return DownloadError::Forbidden( + player_data.client_type, + player_data.visitor_data.clone(), + ); } } e @@ -1410,7 +1417,6 @@ async fn download_chunks_by_param( )); } - tracing::debug!("Retrieving chunks..."); let mut stream = res.bytes_stream(); while let Some(item) = stream.next().await { // Retrieve chunk. diff --git a/notes/AB_Tests.md b/notes/AB_Tests.md index 0a73315..c85156a 100644 --- a/notes/AB_Tests.md +++ b/notes/AB_Tests.md @@ -3,12 +3,12 @@ When YouTube introduces a new feature, it does so gradually. When a user creates a new session, YouTube decided randomly which new features should be enabled. -YouTube sessions are identified by the visitor data cookie. This cookie is sent with +YouTube sessions are identified by the visitor data ID. This cookie is sent with every API request using the `context.client.visitor_data` JSON parameter. It is also returned in the `responseContext.visitorData` response parameter and stored as the `__SECURE-YEC` cookie. -By sending the same visitor data cookie, A/B tests can be reproduced, which is important +By sending the same visitor data ID, A/B tests can be reproduced, which is important for testing alternative YouTube clients. This page lists all A/B tests that were encountered while maintaining the RustyPipe @@ -381,7 +381,7 @@ YouTube also changed the way the full discography page is fetched, surprisingly it easier for alternative clients. The discography page now has its own content ID in the format of `MPAD` (Music Page Artist Discography). This page can be fetched with a regular browse request without requiring parameters to be parsed or a -visitor data cookie to be set, as it was the case with the old system. +visitor data ID to be set, as it was the case with the old system. **OLD** diff --git a/notes/po_token.md b/notes/po_token.md index 26064e6..f85ec16 100644 --- a/notes/po_token.md +++ b/notes/po_token.md @@ -16,7 +16,7 @@ The pot token is base64-formatted and usually starts with a M `MnToZ2brHmyo0ehfKtK_EWUq60dPYDXksNX_UsaniM_Uj6zbtiIZujCHY02hr7opxB_n3XHetJQCBV9cnNHovuhvDqrjfxsKR-sjn-eIxqv3qOZKphvyDpQzlYBnT2AXK41R-ti6iPonrvlvKIASNmYX2lhsEg==` -The token is generated from YouTubes Botguard script. The token is bound to the visitor data cookie +The token is generated from YouTubes Botguard script. The token is bound to the visitor data ID used to fetch the player data. This feature has been A/B-tested for a few weeks. During that time, refetching the player in case diff --git a/src/client/mod.rs b/src/client/mod.rs index f9462cb..07817c0 100644 --- a/src/client/mod.rs +++ b/src/client/mod.rs @@ -25,6 +25,7 @@ mod video_details; mod channel_rss; use std::collections::HashMap; +use std::ffi::OsString; use std::path::PathBuf; use std::sync::{Arc, RwLock}; use std::{borrow::Cow, fmt::Debug, time::Duration}; @@ -97,6 +98,13 @@ impl ClientType { fn needs_deobf(self) -> bool { !matches!(self, ClientType::Ios) } + + fn needs_po_token(self) -> bool { + matches!( + self, + ClientType::Desktop | ClientType::DesktopMusic | ClientType::Mobile + ) + } } /// YouTube context request parameter @@ -317,7 +325,7 @@ pub(crate) const DEFAULT_UA: &str = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit pub(crate) const MOBILE_UA: &str = "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.6778.135 Mobile Safari/537.36"; pub(crate) const TV_UA: &str = "Mozilla/5.0 (SMART-TV; Linux; Tizen 5.0) AppleWebKit/538.1 (KHTML, like Gecko) Version/5.0 NativeTVAds Safari/538.1"; -const CONSENT_COOKIE: &str = "SOCS=CAISAiAD"; +pub(crate) const CONSENT_COOKIE: &str = "SOCS=CAISAiAD"; const YOUTUBEI_V1_URL: &str = "https://www.youtube.com/youtubei/v1/"; const YOUTUBEI_V1_GAPIS_URL: &str = "https://youtubei.googleapis.com/youtubei/v1/"; @@ -352,13 +360,6 @@ const OAUTH_SCOPES: &str = "http://gdata.youtube.com https://www.googleapis.com/ static CLIENT_VERSION_REGEX: Lazy = Lazy::new(|| Regex::new(r#""INNERTUBE_CONTEXT_CLIENT_VERSION":"([\w\d\._-]+?)""#).unwrap()); -/// Default order of client types when fetching player data -/// -/// The order may change in the future in case YouTube applies changes to their -/// platform that disable a client or make it less reliable. -pub const DEFAULT_PLAYER_CLIENT_ORDER: &[ClientType] = - &[ClientType::Ios, ClientType::Tv, ClientType::Android]; - /// The RustyPipe client used to access YouTube's API /// /// RustyPipe uses an [`Arc`] internally, so if you are using the client @@ -378,6 +379,7 @@ struct RustyPipeRef { default_opts: RustyPipeOpts, user_agent: Cow<'static, str>, visitor_data_cache: VisitorDataCache, + botguard: Option, } #[derive(Clone)] @@ -399,6 +401,12 @@ pub struct RustyPipeBuilder { user_agent: Option, default_opts: RustyPipeOpts, storage_dir: Option, + botguard_bin: DefaultOpt, +} + +struct BotguardCfg { + program: OsString, + snapshot_file: PathBuf, } enum DefaultOpt { @@ -415,6 +423,13 @@ impl DefaultOpt { DefaultOpt::Default => Some(f()), } } + fn or_default_opt Option>(self, f: F) -> Option { + match self { + DefaultOpt::Some(x) => Some(x), + DefaultOpt::None => None, + DefaultOpt::Default => f(), + } + } } /// # RustyPipe query @@ -477,7 +492,7 @@ impl DefaultOpt { /// /// ## Options /// -/// You can set the language, country and visitor data cookie for individual requests. +/// You can set the language, country and visitor data ID for individual requests. /// /// ``` /// # use rustypipe::client::RustyPipe; @@ -626,6 +641,7 @@ impl RustyPipeBuilder { n_http_retries: 2, user_agent: None, storage_dir: None, + botguard_bin: DefaultOpt::Default, } } @@ -690,12 +706,25 @@ impl RustyPipeBuilder { let visitor_data_cache = VisitorDataCache::new(http.clone()); + let botguard_bin = self.botguard_bin.or_default_opt(|| { + let n = OsString::from("rustypipe-botguard"); + let out = std::process::Command::new(&n) + .arg("--version") + .output() + .ok()?; + if out.status.success() { + Some(n) + } else { + None + } + }); + Ok(RustyPipe { inner: Arc::new(RustyPipeRef { http, storage, reporter: self.reporter.or_default(|| { - let mut report_dir = storage_dir; + let mut report_dir = storage_dir.clone(); report_dir.push(DEFAULT_REPORT_DIR); Box::new(FileReporter::new(report_dir)) }), @@ -709,6 +738,14 @@ impl RustyPipeBuilder { default_opts: self.default_opts, user_agent, visitor_data_cache, + botguard: botguard_bin.map(|program| { + let mut snapshot_file = storage_dir; + snapshot_file.push("bg_snapshot.bin"); + BotguardCfg { + program, + snapshot_file, + } + }), }), }) } @@ -868,14 +905,14 @@ impl RustyPipeBuilder { self } - /// Set the YouTube visitor data cookie + /// Set the YouTube visitor data ID /// /// YouTube assigns a session cookie to each user which is used for personalized /// recommendations. By default, RustyPipe does not send this cookie to preserve /// user privacy. For requests that mandatate the cookie, a new one is requested /// for every query. /// - /// This option allows you to manually set the visitor data cookie of your client, + /// This option allows you to manually set the visitor data ID of your client, /// allowing you to get personalized recommendations or reproduce A/B tests. /// /// Note that YouTube has a rate limit on the number of requests from a single @@ -888,7 +925,7 @@ impl RustyPipeBuilder { self } - /// Set the YouTube visitor data cookie to an optional value + /// Set the YouTube visitor data ID to an optional value /// /// see also [`RustyPipeBuilder::visitor_data`] /// @@ -898,6 +935,26 @@ impl RustyPipeBuilder { self.default_opts.visitor_data = visitor_data.map(S::into); self } + + /// Disable RustyPipe Botguard + /// + /// By default, RustyPipe uses the `rustypipe-botguard` binary if it is available. If you want to + /// use RustyPipe without Botguard, you can disable it. + pub fn no_botguard(mut self) -> Self { + self.botguard_bin = DefaultOpt::None; + self + } + + /// Enable RustyPipe Botguard using the given binary + /// + /// Botguard is required to generate PO tokens for accessing streams on browser-based clients. + /// By default, RustyPipe uses the `rustypipe-botguard` binary if it is available. + /// + /// More information: + pub fn botguard_bin>(mut self, botguard_bin: S) -> Self { + self.botguard_bin = DefaultOpt::Some(botguard_bin.into()); + self + } } impl Default for RustyPipe { @@ -1191,17 +1248,6 @@ impl RustyPipe { } } - /// Request a new visitor data cookie from YouTube - /// - /// Since the cookie is shared between YT and YTM and the YTM page loads faster, - /// we request that. - /// - /// Sometimes YouTube does not set the `__Secure-YEC` cookie. In this case, the - /// visitor data is extracted from the html page. - async fn get_visitor_data(&self) -> Result { - self.inner.visitor_data_cache.new_visitor_data().await - } - /// Get a new device code for logging into YouTube pub async fn user_auth_get_code(&self) -> Result { tracing::debug!("getting OAuth user code"); @@ -1618,14 +1664,14 @@ impl RustyPipeQuery { self } - /// Set the YouTube visitor data cookie + /// Set the YouTube visitor data ID /// /// YouTube assigns a session cookie to each user which is used for personalized /// recommendations. By default, RustyPipe does not send this cookie to preserve /// user privacy. For requests that mandatate the cookie, a new one is requested /// for every query. /// - /// This option allows you to manually set the visitor data cookie of your query, + /// This option allows you to manually set the visitor data ID of your query, /// allowing you to get personalized recommendations or reproduce A/B tests. /// /// Note that YouTube has a rate limit on the number of requests from a single @@ -1636,7 +1682,7 @@ impl RustyPipeQuery { self } - /// Set the YouTube visitor data cookie to an optional value + /// Set the YouTube visitor data ID to an optional value /// /// see also [`RustyPipeQuery::visitor_data`] #[must_use] @@ -1845,7 +1891,7 @@ impl RustyPipeQuery { /// - `ctype`: Client type (`Desktop`, `DesktopMusic`, `Android`, ...) /// - `method`: HTTP method /// - `endpoint`: YouTube API endpoint (`https://www.youtube.com/youtubei/v1/?key=...`) - /// - `visitor_data`: YouTube visitor data cookie + /// - `visitor_data`: YouTube visitor data ID async fn request_builder( &self, ctype: ClientType, @@ -1987,14 +2033,75 @@ impl RustyPipeQuery { Some(format!("SAPISIDHASH {time_now}_{sapisidhash_hex}")) } - /// Get a YouTube visitor data cookie, which is necessary for certain requests - pub async fn get_visitor_data(&self) -> Result { + /// Get a YouTube visitor data ID, which is necessary for certain requests + pub async fn get_visitor_data(&self, force_new: bool) -> Result { + if force_new { + return self + .client + .inner + .visitor_data_cache + .new_visitor_data() + .await; + } + match &self.opts.visitor_data { Some(vd) => Ok(vd.clone()), - None => self.client.get_visitor_data().await, + None => self.client.inner.visitor_data_cache.get().await, } } + /// Remove a YouTube visitor data ID from the cache so it is not used again + pub fn remove_visitor_data(&self, visitor_data: &str) { + self.client.inner.visitor_data_cache.remove(visitor_data); + } + + /// Get PO tokens + async fn get_po_tokens(&self, idents: &[&str]) -> Result, Error> { + let bg = self + .client + .inner + .botguard + .as_ref() + .ok_or(Error::Extraction(ExtractionError::Botguard( + "not enabled".into(), + )))?; + let cmd = tokio::process::Command::new(&bg.program) + .arg("--snapshot-file") + .arg(&bg.snapshot_file) + .arg("--") + .args(idents) + .output() + .await + .map_err(|e| Error::Extraction(ExtractionError::Botguard(e.to_string().into())))?; + if !cmd.status.success() { + return Err(Error::Extraction(ExtractionError::Botguard( + String::from_utf8_lossy(&cmd.stderr).into_owned().into(), + ))); + } + + let output = String::from_utf8(cmd.stdout) + .map_err(|e| Error::Extraction(ExtractionError::Botguard(e.to_string().into())))?; + let tokens = output + .split_whitespace() + .take(idents.len()) + .map(str::to_owned) + .collect::>(); + if tokens.len() != idents.len() { + return Err(Error::Extraction(ExtractionError::Botguard( + "too few tokens returned".into(), + ))); + } + tracing::debug!("generated PO token"); + Ok(tokens) + } + + /// Get a PO token + pub async fn get_po_token>(self, ident: S) -> Result { + self.get_po_tokens(&[ident.as_ref()]) + .await + .map(|res| res.into_iter().next().unwrap()) + } + async fn yt_request_attempt + Debug, M>( &self, request: &Request, @@ -2128,6 +2235,7 @@ impl RustyPipeQuery { client_type: ctype, artist: ctx_src.artist, authenticated: self.opts.auth.unwrap_or_default(), + session_po_token: ctx_src.session_po_token, }; let request = self @@ -2284,6 +2392,7 @@ struct MapRespCtx<'a> { client_type: ClientType, artist: Option, authenticated: bool, + session_po_token: Option<&'a str>, } /// Options to give to the mapper when making requests; @@ -2294,6 +2403,7 @@ struct MapRespOptions<'a> { deobf: Option<&'a DeobfData>, artist: Option, unlocalized: bool, + session_po_token: Option<&'a str>, } #[allow(clippy::needless_lifetimes)] @@ -2309,6 +2419,7 @@ impl<'a> MapRespCtx<'a> { client_type: ClientType::Desktop, artist: None, authenticated: false, + session_po_token: None, } } } @@ -2370,11 +2481,23 @@ mod tests { #[tokio::test] async fn get_visitor_data() { let rp = RustyPipe::new(); - let visitor_data = rp.get_visitor_data().await.unwrap(); + let visitor_data = rp.query().get_visitor_data(true).await.unwrap(); assert!( visitor_data.starts_with("Cg") && visitor_data.len() > 23, "invalid visitor data: {visitor_data}" ); } + + #[tokio::test] + async fn get_po_token() { + let rp = RustyPipe::builder().build().unwrap(); + let ident = "Cgt4eDYyVVJveGQtbyiLyvu8BjIKCgJERRIEEgAgKw=="; + let po_token = rp.query().get_po_token(ident).await.unwrap(); + + let token_bts = data_encoding::BASE64URL + .decode(po_token.as_bytes()) + .unwrap(); + assert_eq!(token_bts.len(), ident.len() + 74); + } } diff --git a/src/client/player.rs b/src/client/player.rs index 2b39339..ab1cdc1 100644 --- a/src/client/player.rs +++ b/src/client/player.rs @@ -26,7 +26,6 @@ use super::{ player::{self, Format}, }, ClientType, MapRespCtx, MapRespOptions, MapResponse, MapResult, RustyPipeQuery, - DEFAULT_PLAYER_CLIENT_ORDER, }; #[derive(Debug, Serialize)] @@ -41,6 +40,9 @@ struct QPlayer<'a> { content_check_ok: bool, /// Probably refers to allowing sensitive content, too racy_check_ok: bool, + /// Botguard data + #[serde(skip_serializing_if = "Option::is_none")] + service_integrity_dimensions: Option, } #[derive(Debug, Serialize)] @@ -70,10 +72,16 @@ struct QDrmLicense<'a> { drm_video_feature: &'a str, } +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +struct ServiceIntegrity { + po_token: String, +} + impl RustyPipeQuery { /// Get YouTube player data (video/audio streams + basic metadata) pub async fn player + Debug>(&self, video_id: S) -> Result { - self.player_from_clients(video_id, DEFAULT_PLAYER_CLIENT_ORDER) + self.player_from_clients(video_id, self.player_client_order()) .await } @@ -142,28 +150,46 @@ impl RustyPipeQuery { client_type: ClientType, ) -> Result { let video_id = video_id.as_ref(); - let mut deobf = None; - let request_body = if client_type.needs_deobf() { - deobf = Some(self.client.get_deobf_data().await?); - QPlayer { - playback_context: Some(QPlaybackContext { - content_playback_context: QContentPlaybackContext { - signature_timestamp: &deobf.as_ref().unwrap().sts, - referer: format!("https://www.youtube.com/watch?v={video_id}"), - }, - }), - video_id, - content_check_ok: true, - racy_check_ok: true, - } - } else { - QPlayer { - playback_context: None, - video_id, - content_check_ok: true, - racy_check_ok: true, + let visitor_data = self.get_visitor_data(false).await?; + + let (deobf, (service_integrity_dimensions, session_po_token)) = tokio::try_join!( + async { + if client_type.needs_deobf() { + Ok::<_, Error>(Some(self.client.get_deobf_data().await?)) + } else { + Ok(None) + } + }, + async { + if client_type.needs_po_token() { + let mut po_tokens = self + .get_po_tokens(&[video_id, &visitor_data]) + .await? + .into_iter(); + let po_token = po_tokens.next().unwrap(); + let session_po_token = po_tokens.next().unwrap(); + + Ok((Some(ServiceIntegrity { po_token }), Some(session_po_token))) + } else { + Ok((None, None)) + } } + )?; + + let playback_context = deobf.as_ref().map(|deobf| QPlaybackContext { + content_playback_context: QContentPlaybackContext { + signature_timestamp: &deobf.sts, + referer: format!("https://www.youtube.com/watch?v={video_id}"), + }, + }); + + let request_body = QPlayer { + playback_context, + video_id, + content_check_ok: true, + racy_check_ok: true, + service_integrity_dimensions, }; self.execute_request_ctx::( @@ -173,14 +199,28 @@ impl RustyPipeQuery { "player", &request_body, MapRespOptions { + visitor_data: Some(&visitor_data), deobf: deobf.as_ref(), unlocalized: true, + session_po_token: session_po_token.as_deref(), ..Default::default() }, ) .await } + /// Get the default order of client types when fetching player data + /// + /// The order may change in the future in case YouTube applies changes to their + /// platform that disable a client or make it less reliable. + pub fn player_client_order(&self) -> &'static [ClientType] { + if self.client.inner.botguard.is_some() { + &[ClientType::Desktop, ClientType::Ios, ClientType::Tv] + } else { + &[ClientType::Ios, ClientType::Tv] + } + } + /// Get a license to play back DRM protected videos /// /// Requires authentication (either via OAuth or cookies). @@ -250,6 +290,7 @@ impl MapResponse for response::Player { "country" => Some(UnavailabilityReason::Geoblocked), "version" | "websites" => Some(UnavailabilityReason::UnsupportedClient), "bot" => Some(UnavailabilityReason::IpBan), + "later." => Some(UnavailabilityReason::TryAgain), _ => None, }) .unwrap_or_default(); @@ -327,7 +368,7 @@ impl MapResponse for response::Player { }; let streams = if !is_live { - let mut mapper = StreamsMapper::new(ctx.deobf)?; + let mut mapper = StreamsMapper::new(ctx.deobf, ctx.session_po_token)?; mapper.map_streams(streaming_data.formats); mapper.map_streams(streaming_data.adaptive_formats); let mut res = mapper.output()?; @@ -442,8 +483,9 @@ impl MapResponse for response::Player { } } -struct StreamsMapper { +struct StreamsMapper<'a> { deobf: Option, + session_po_token: Option<&'a str>, streams: Streams, warnings: Vec, /// First stream mapping error @@ -461,8 +503,11 @@ struct Streams { audio_streams: Vec, } -impl StreamsMapper { - fn new(deobf_data: Option<&DeobfData>) -> Result { +impl<'a> StreamsMapper<'a> { + fn new( + deobf_data: Option<&DeobfData>, + session_po_token: Option<&'a str>, + ) -> Result { let deobf = match deobf_data { Some(deobf_data) => Some(Deobfuscator::new(deobf_data)?), None => None, @@ -470,6 +515,7 @@ impl StreamsMapper { Ok(Self { deobf, + session_po_token, streams: Streams::default(), warnings: Vec::new(), first_err: None, @@ -609,6 +655,10 @@ impl StreamsMapper { }?; self.deobf_nsig(&mut url_params)?; + if let Some(pot) = self.session_po_token { + url_params.insert("pot".to_owned(), pot.to_owned()); + } + let url = Url::parse_with_params(url_base.as_str(), url_params.iter()) .map_err(|_| ExtractionError::InvalidData("could not combine URL".into()))?; @@ -880,6 +930,7 @@ mod tests { client_type, artist: None, authenticated: false, + session_po_token: None, }) .unwrap(); @@ -905,7 +956,7 @@ mod tests { #[test] fn cipher_to_url() { let signature_cipher = "s=w%3DAe%3DA6aDNQLkViKS7LOm9QtxZJHKwb53riq9qEFw-ecBWJCAiA%3DcEg0tn3dty9jEHszfzh4Ud__bg9CEHVx4ix-7dKsIPAhIQRw8JQ0qOA&sp=sig&url=https://rr5---sn-h0jelnez.googlevideo.com/videoplayback%3Fexpire%3D1659376413%26ei%3Dvb7nYvH5BMK8gAfBj7ToBQ%26ip%3D2003%253Ade%253Aaf06%253A6300%253Ac750%253A1b77%253Ac74a%253A80e3%26id%3Do-AB_BABwrXZJN428ZwDxq5ScPn2AbcGODnRlTVhCQ3mj2%26itag%3D251%26source%3Dyoutube%26requiressl%3Dyes%26mh%3DhH%26mm%3D31%252C26%26mn%3Dsn-h0jelnez%252Csn-4g5ednsl%26ms%3Dau%252Conr%26mv%3Dm%26mvi%3D5%26pl%3D37%26initcwndbps%3D1588750%26spc%3DlT-Khi831z8dTejFIRCvCEwx_6romtM%26vprv%3D1%26mime%3Daudio%252Fwebm%26ns%3Db_Mq_qlTFcSGlG9RpwpM9xQH%26gir%3Dyes%26clen%3D3781277%26dur%3D229.301%26lmt%3D1655510291473933%26mt%3D1659354538%26fvip%3D5%26keepalive%3Dyes%26fexp%3D24001373%252C24007246%26c%3DWEB%26rbqsm%3Dfr%26txp%3D4532434%26n%3Dd2g6G2hVqWIXxedQ%26sparams%3Dexpire%252Cei%252Cip%252Cid%252Citag%252Csource%252Crequiressl%252Cspc%252Cvprv%252Cmime%252Cns%252Cgir%252Cclen%252Cdur%252Clmt%26lsparams%3Dmh%252Cmm%252Cmn%252Cms%252Cmv%252Cmvi%252Cpl%252Cinitcwndbps%26lsig%3DAG3C_xAwRQIgCKCGJ1iu4wlaGXy3jcJyU3inh9dr1FIfqYOZEG_MdmACIQCbungkQYFk7EhD6K2YvLaHFMjKOFWjw001_tLb0lPDtg%253D%253D"; - let mut mapper = StreamsMapper::new(Some(&DEOBF_DATA)).unwrap(); + let mut mapper = StreamsMapper::new(Some(&DEOBF_DATA), None).unwrap(); let url = mapper .map_url(&None, &Some(signature_cipher.to_owned())) .unwrap() diff --git a/src/error.rs b/src/error.rs index 92460c6..b211904 100644 --- a/src/error.rs +++ b/src/error.rs @@ -58,6 +58,9 @@ pub enum ExtractionError { /// Error deobfuscating YouTube's URL signatures #[error("deobfuscation error: {0}")] Deobfuscation(Cow<'static, str>), + /// Error generating Botguard tokens + #[error("botguard error: {0}")] + Botguard(Cow<'static, str>), /// YouTube returned data that does not match the queried ID /// /// Specifically YouTube may return this video , @@ -102,6 +105,8 @@ pub enum UnavailabilityReason { OfflineLivestream, /// YouTube banned your IP address from accessing the platform without an account IpBan, + /// Video temporarily unavailable (rate limit) + TryAgain, /// Video cant be played for other reasons #[default] Unplayable, @@ -120,6 +125,7 @@ impl Display for UnavailabilityReason { UnavailabilityReason::MembersOnly => f.write_str("members-only"), UnavailabilityReason::OfflineLivestream => f.write_str("offline stream"), UnavailabilityReason::IpBan => f.write_str("ip-ban"), + UnavailabilityReason::TryAgain => f.write_str("try again"), UnavailabilityReason::Unplayable => f.write_str("unplayable"), } } @@ -220,7 +226,13 @@ impl Error { Ok(status) => status.is_server_error() || status == StatusCode::TOO_MANY_REQUESTS, Err(_) => false, }, - Self::Extraction(ExtractionError::InvalidData(_)) => true, + Self::Extraction( + ExtractionError::InvalidData(_) + | ExtractionError::Unavailable { + reason: UnavailabilityReason::TryAgain, + .. + }, + ) => true, _ => false, } } @@ -232,9 +244,10 @@ impl ExtractionError { matches!( self, ExtractionError::Unavailable { - reason: UnavailabilityReason::UnsupportedClient, + reason: UnavailabilityReason::UnsupportedClient | UnavailabilityReason::TryAgain, .. } | ExtractionError::WrongResult(_) + | ExtractionError::Botguard(_) ) } diff --git a/src/model/mod.rs b/src/model/mod.rs index e265595..94b05a6 100644 --- a/src/model/mod.rs +++ b/src/model/mod.rs @@ -149,7 +149,7 @@ pub struct VideoPlayer { pub drm: Option, /// Client type with which the player was fetched pub client_type: ClientType, - /// YouTube visitor data cookie + /// YouTube visitor data ID pub visitor_data: Option, } @@ -615,7 +615,7 @@ pub struct Playlist { pub last_update: Option, /// Textual last update date pub last_update_txt: Option, - /// YouTube visitor data cookie + /// YouTube visitor data ID pub visitor_data: Option, } @@ -683,7 +683,7 @@ pub struct VideoDetails { /// /// Is initially empty. pub latest_comments: Paginator, - /// YouTube visitor data cookie + /// YouTube visitor data ID pub visitor_data: Option, } @@ -820,7 +820,7 @@ pub struct Channel { pub has_shorts: bool, /// Does the channel have a *Live* tab? pub has_live: bool, - /// YouTube visitor data cookie + /// YouTube visitor data ID pub visitor_data: Option, /// Content fetched from the channel pub content: T, @@ -905,7 +905,7 @@ pub struct SearchResult { /// for the corrected search term and displays it on top of the /// search results page. pub corrected_query: Option, - /// YouTube visitor data cookie + /// YouTube visitor data ID pub visitor_data: Option, } diff --git a/src/util/visitor_data.rs b/src/util/visitor_data.rs index ac2bd13..4831b6a 100644 --- a/src/util/visitor_data.rs +++ b/src/util/visitor_data.rs @@ -6,7 +6,7 @@ use regex::Regex; use reqwest::{header, Client}; use crate::{ - client::YOUTUBE_MUSIC_HOME_URL, + client::{CONSENT_COOKIE, YOUTUBE_MUSIC_HOME_URL}, error::{Error, ExtractionError}, util, }; @@ -35,9 +35,9 @@ struct VisitorDataCacheRef { static VISITOR_DATA_REGEX: Lazy = Lazy::new(|| Regex::new(r#""visitorData":"([\w\d_\-%]+?)""#).unwrap()); /// Number of requests after which a new token is requested -const REQ_LIMIT: u32 = 10; -/// Maximum size of the cache (-1) -const MAX_SIZE: usize = 99; +const REQ_LIMIT: u32 = 50; +/// Maximum size of the cache +const MAX_SIZE: usize = 20; impl VisitorDataCache { pub fn new(http: Client) -> Self { @@ -59,6 +59,7 @@ impl VisitorDataCache { .get(YOUTUBE_MUSIC_HOME_URL) .header(header::ORIGIN, YOUTUBE_MUSIC_HOME_URL) .header(header::REFERER, YOUTUBE_MUSIC_HOME_URL) + .header(header::COOKIE, CONSENT_COOKIE) .send() .await?; @@ -100,10 +101,11 @@ impl VisitorDataCache { } pub async fn new_visitor_data(&self) -> Result { + let vd = self.get_visitor_data().await.unwrap(); + self.inner .req_counter - .store(0, std::sync::atomic::Ordering::SeqCst); - let vd = self.get_visitor_data().await.unwrap(); + .store(0, std::sync::atomic::Ordering::Relaxed); let mut vds = self.inner.visitor_data.write().unwrap(); for _ in 0..(vds.len().saturating_sub(MAX_SIZE)) { let rem = vds.remove(0); @@ -119,9 +121,12 @@ impl VisitorDataCache { if self .inner .req_counter - .fetch_add(1, std::sync::atomic::Ordering::SeqCst) + .fetch_add(1, std::sync::atomic::Ordering::Relaxed) >= REQ_LIMIT { + self.inner + .req_counter + .store(0, std::sync::atomic::Ordering::Relaxed); let nc = self.clone(); tokio::spawn(async move { nc.new_visitor_data().await }); } @@ -138,6 +143,14 @@ impl VisitorDataCache { // Fetch new visitor data if the cache is empty self.new_visitor_data().await } + + pub fn remove(&self, visitor_data: &str) { + let mut vds = self.inner.visitor_data.write().unwrap(); + if let Some(i) = vds.iter().position(|x| x == visitor_data) { + vds.remove(i); + tracing::debug!("visitor data {visitor_data} removed from cache"); + } + } } #[cfg(test)] diff --git a/tests/youtube.rs b/tests/youtube.rs index 05226af..5867637 100644 --- a/tests/youtube.rs +++ b/tests/youtube.rs @@ -139,11 +139,8 @@ async fn get_player_from_client(#[case] client_type: ClientType, rp: RustyPipe) assert_eq!(audio.format, AudioFormat::Webm); assert_eq!(audio.codec, AudioCodec::Opus); - // Desktop client now requires pot token so the streams cannot be tested here - if !matches!(client_type, ClientType::Desktop | ClientType::Mobile) { - check_video_stream(video).await; - check_video_stream(audio).await; - } + check_video_stream(video).await; + check_video_stream(audio).await; } assert!(player_data.expires_in_seconds > 10000);