From 904f8215d84c810b04e4d2134718e786a4803ad2 Mon Sep 17 00:00:00 2001 From: ThetaDev Date: Fri, 9 Aug 2024 21:41:47 +0200 Subject: [PATCH] feat: add potoken option to downloader --- cli/src/main.rs | 7 +++++ downloader/src/lib.rs | 64 +++++++++++++++++++++++++++++++++++++++++-- notes/po_token.md | 30 ++++++++++++++++++++ 3 files changed, 99 insertions(+), 2 deletions(-) create mode 100644 notes/po_token.md diff --git a/cli/src/main.rs b/cli/src/main.rs index 6a954a9..35107a9 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -103,6 +103,9 @@ enum Commands { /// YT Client used to fetch player data #[clap(long)] client_type: Option, + /// Pot token to circumvent bot detection + #[clap(long)] + pot: Option, }, /// Extract video, playlist, album or channel data Get { @@ -536,6 +539,7 @@ async fn main() { music, limit, client_type, + pot, } => { let url_target = rp.query().resolve_string(&id, false).await.unwrap(); @@ -555,6 +559,9 @@ async fn main() { dl = dl.audio_tag().crop_cover(); filter = filter.no_video(); } + if let Some(pot) = pot { + dl = dl.pot(pot); + } let dl = dl.stream_filter(filter).build(); match url_target { diff --git a/downloader/src/lib.rs b/downloader/src/lib.rs index 40c2583..d8e22c8 100644 --- a/downloader/src/lib.rs +++ b/downloader/src/lib.rs @@ -17,7 +17,7 @@ use futures::stream::{self, StreamExt}; use once_cell::sync::Lazy; use rand::Rng; use regex::Regex; -use reqwest::{header, Client, StatusCode}; +use reqwest::{header, Client, StatusCode, Url}; use rustypipe::{ client::{ClientType, RustyPipe}, model::{ @@ -74,6 +74,7 @@ pub struct DownloaderBuilder { audio_tag: bool, #[cfg(feature = "audiotag")] crop_cover: bool, + pot: Option, } struct DownloaderInner { @@ -103,6 +104,8 @@ struct DownloaderInner { /// Crop YT thumbnails to ensure square album covers #[cfg(feature = "audiotag")] crop_cover: bool, + /// Pot token to circumvent bot detection + pot: Option, } /// Download query @@ -122,6 +125,8 @@ pub struct DownloadQuery { video_format: Option, /// ClientType type for fetching videos client_type: Option, + /// Pot token to circumvent bot detection + pot: Option, } /// Video to be downloaded @@ -287,6 +292,7 @@ impl Default for DownloaderBuilder { audio_tag: false, #[cfg(feature = "audiotag")] crop_cover: false, + pot: None, } } } @@ -384,6 +390,21 @@ impl DownloaderBuilder { self } + /// Set the `pot` token to circumvent bot detection + /// + /// YouTube has implemented the token to prevent other clients from downloading YouTube videos. + /// The token is generated using YouTube's botguard. Therefore you need a full browser environment + /// to obtain one. + /// + /// The Invidious project has created a script to extract this token: + /// + /// The `pot` token is only used for the [`ClientType::Desktop`] and [`ClientType::DesktopMusic`] clients. + #[must_use] + pub fn pot>(mut self, pot: S) -> Self { + self.pot = Some(pot.into()); + self + } + /// Create a new, configured [`Downloader`] instance pub fn build(self) -> Downloader { self.build_with_client( @@ -417,6 +438,7 @@ impl DownloaderBuilder { audio_tag: self.audio_tag, #[cfg(feature = "audiotag")] crop_cover: self.crop_cover, + pot: self.pot, }), } } @@ -451,6 +473,7 @@ impl Downloader { filter: None, video_format: None, client_type: None, + pot: None, } } @@ -590,6 +613,21 @@ impl DownloadQuery { self } + /// Set the `pot` token to circumvent bot detection + /// + /// YouTube has implemented the token to prevent other clients from downloading YouTube videos. + /// The token is generated using YouTube's botguard. Therefore you need a full browser environment + /// to obtain one. + /// + /// The Invidious project has created a script to extract this token: + /// + /// The `pot` token is only used for the [`ClientType::Desktop`] and [`ClientType::DesktopMusic`] clients. + #[must_use] + pub fn pot>(mut self, pot: S) -> Self { + self.pot = Some(pot.into()); + self + } + /// Download the video /// /// If no download path is set, the video is downloaded to the current directory @@ -685,6 +723,14 @@ impl DownloadQuery { None => q.player(&self.video.id).await?, }; let user_agent = q.user_agent(player_data.client_type); + let pot = if matches!( + player_data.client_type, + ClientType::Desktop | ClientType::DesktopMusic + ) { + self.pot.as_deref().or(self.dl.i.pot.as_deref()) + } else { + None + }; // Select streams to download let (video, audio) = player_data.select_video_audio_stream(filter); @@ -762,6 +808,7 @@ impl DownloadQuery { &downloads, &self.dl.i.http, &user_agent, + pot, #[cfg(feature = "indicatif")] pb.clone(), ) @@ -1006,6 +1053,7 @@ async fn download_single_file( output: &Path, http: &Client, user_agent: &str, + pot: Option<&str>, #[cfg(feature = "indicatif")] pb: Option, ) -> Result<()> { // Check if file is already downloaded @@ -1102,6 +1150,7 @@ async fn download_single_file( size.unwrap(), offset, user_agent, + pot, #[cfg(feature = "indicatif")] pb, ) @@ -1209,6 +1258,7 @@ async fn download_chunks_by_header( // Use the `range` url parameter to download a stream in chunks. // This ist used by YouTube's web player. The file size // must be known beforehand (it is included in the stream url). +#[allow(clippy::too_many_arguments)] async fn download_chunks_by_param( http: &Client, file: &mut File, @@ -1216,6 +1266,7 @@ async fn download_chunks_by_param( size: u64, offset: u64, user_agent: &str, + pot: Option<&str>, #[cfg(feature = "indicatif")] pb: Option, ) -> Result<()> { let mut offset = offset; @@ -1228,8 +1279,15 @@ async fn download_chunks_by_param( let range = get_download_range(offset, Some(size)); tracing::debug!("Fetching range {}-{}", range.start, range.end); + let mut urlp = + Url::parse_with_params(url, [("range", &format!("{}-{}", range.start, range.end))]) + .map_err(|e| DownloadError::Progressive(format!("url parsing: {e}").into()))?; + if let Some(pot) = pot { + urlp.query_pairs_mut().append_pair("pot", pot); + } + let res = http - .get(format!("{}&range={}-{}", url, range.start, range.end)) + .get(urlp) .header(header::USER_AGENT, user_agent) .header(header::ORIGIN, "https://www.youtube.com") .header(header::REFERER, "https://www.youtube.com/") @@ -1277,6 +1335,7 @@ async fn download_streams( downloads: &Vec, http: &Client, user_agent: &str, + pot: Option<&str>, #[cfg(feature = "indicatif")] pb: Option, ) -> Result<()> { let n = downloads.len(); @@ -1288,6 +1347,7 @@ async fn download_streams( &d.file, http, user_agent, + pot, #[cfg(feature = "indicatif")] pb.clone(), ) diff --git a/notes/po_token.md b/notes/po_token.md new file mode 100644 index 0000000..26064e6 --- /dev/null +++ b/notes/po_token.md @@ -0,0 +1,30 @@ +# About the new `pot` token + +YouTube has implemented a new method to prevent downloaders and alternative clients from accessing +their videos. Now requests to YouTube's video servers require a `pot` URL parameter. + +It is currently only required in the web player. The YTM and embedded player sends the token, too, but does not require it (this may change in the future). + +The TV player does not use the token at all and is currently the best workaround. The only downside +is that the TV player does not return any video metadata like title and description text. + +The first part of a video file (range: 0-1007959 bytes) can be downloaded without the token. +Requesting more of the file requires the pot token to be set, otherwise YouTube responds with a 403 +error. + +The pot token is base64-formatted and usually starts with a M + +`MnToZ2brHmyo0ehfKtK_EWUq60dPYDXksNX_UsaniM_Uj6zbtiIZujCHY02hr7opxB_n3XHetJQCBV9cnNHovuhvDqrjfxsKR-sjn-eIxqv3qOZKphvyDpQzlYBnT2AXK41R-ti6iPonrvlvKIASNmYX2lhsEg==` + +The token is generated from YouTubes Botguard script. The token is bound to the visitor data cookie +used to fetch the player data. + +This feature has been A/B-tested for a few weeks. During that time, refetching the player in case +of a 403 download error often made things work again. As of 08.08.2024 this new feature seems to be +stabilized and retrying requests does not work any more. + +## Getting a `pot` token + +You need a real browser environment to run YouTube's botguard and obtain a pot token. The Invidious project has created a script to +. +The script opens YouTube's embedded video player, starts playback and extracts the visitor data