feat: add potoken option to downloader

This commit is contained in:
ThetaDev 2024-08-09 21:41:47 +02:00
parent d36ba595da
commit 904f8215d8
No known key found for this signature in database
GPG key ID: E319D3C5148D65B6
3 changed files with 99 additions and 2 deletions

View file

@ -103,6 +103,9 @@ enum Commands {
/// YT Client used to fetch player data
#[clap(long)]
client_type: Option<PlayerType>,
/// Pot token to circumvent bot detection
#[clap(long)]
pot: Option<String>,
},
/// Extract video, playlist, album or channel data
Get {
@ -536,6 +539,7 @@ async fn main() {
music,
limit,
client_type,
pot,
} => {
let url_target = rp.query().resolve_string(&id, false).await.unwrap();
@ -555,6 +559,9 @@ async fn main() {
dl = dl.audio_tag().crop_cover();
filter = filter.no_video();
}
if let Some(pot) = pot {
dl = dl.pot(pot);
}
let dl = dl.stream_filter(filter).build();
match url_target {

View file

@ -17,7 +17,7 @@ use futures::stream::{self, StreamExt};
use once_cell::sync::Lazy;
use rand::Rng;
use regex::Regex;
use reqwest::{header, Client, StatusCode};
use reqwest::{header, Client, StatusCode, Url};
use rustypipe::{
client::{ClientType, RustyPipe},
model::{
@ -74,6 +74,7 @@ pub struct DownloaderBuilder {
audio_tag: bool,
#[cfg(feature = "audiotag")]
crop_cover: bool,
pot: Option<String>,
}
struct DownloaderInner {
@ -103,6 +104,8 @@ struct DownloaderInner {
/// Crop YT thumbnails to ensure square album covers
#[cfg(feature = "audiotag")]
crop_cover: bool,
/// Pot token to circumvent bot detection
pot: Option<String>,
}
/// Download query
@ -122,6 +125,8 @@ pub struct DownloadQuery {
video_format: Option<DownloadVideoFormat>,
/// ClientType type for fetching videos
client_type: Option<ClientType>,
/// Pot token to circumvent bot detection
pot: Option<String>,
}
/// Video to be downloaded
@ -287,6 +292,7 @@ impl Default for DownloaderBuilder {
audio_tag: false,
#[cfg(feature = "audiotag")]
crop_cover: false,
pot: None,
}
}
}
@ -384,6 +390,21 @@ impl DownloaderBuilder {
self
}
/// Set the `pot` token to circumvent bot detection
///
/// YouTube has implemented the token to prevent other clients from downloading YouTube videos.
/// The token is generated using YouTube's botguard. Therefore you need a full browser environment
/// to obtain one.
///
/// The Invidious project has created a script to extract this token: <https://github.com/iv-org/youtube-trusted-session-generator>
///
/// The `pot` token is only used for the [`ClientType::Desktop`] and [`ClientType::DesktopMusic`] clients.
#[must_use]
pub fn pot<S: Into<String>>(mut self, pot: S) -> Self {
self.pot = Some(pot.into());
self
}
/// Create a new, configured [`Downloader`] instance
pub fn build(self) -> Downloader {
self.build_with_client(
@ -417,6 +438,7 @@ impl DownloaderBuilder {
audio_tag: self.audio_tag,
#[cfg(feature = "audiotag")]
crop_cover: self.crop_cover,
pot: self.pot,
}),
}
}
@ -451,6 +473,7 @@ impl Downloader {
filter: None,
video_format: None,
client_type: None,
pot: None,
}
}
@ -590,6 +613,21 @@ impl DownloadQuery {
self
}
/// Set the `pot` token to circumvent bot detection
///
/// YouTube has implemented the token to prevent other clients from downloading YouTube videos.
/// The token is generated using YouTube's botguard. Therefore you need a full browser environment
/// to obtain one.
///
/// The Invidious project has created a script to extract this token: <https://github.com/iv-org/youtube-trusted-session-generator>
///
/// The `pot` token is only used for the [`ClientType::Desktop`] and [`ClientType::DesktopMusic`] clients.
#[must_use]
pub fn pot<S: Into<String>>(mut self, pot: S) -> Self {
self.pot = Some(pot.into());
self
}
/// Download the video
///
/// If no download path is set, the video is downloaded to the current directory
@ -685,6 +723,14 @@ impl DownloadQuery {
None => q.player(&self.video.id).await?,
};
let user_agent = q.user_agent(player_data.client_type);
let pot = if matches!(
player_data.client_type,
ClientType::Desktop | ClientType::DesktopMusic
) {
self.pot.as_deref().or(self.dl.i.pot.as_deref())
} else {
None
};
// Select streams to download
let (video, audio) = player_data.select_video_audio_stream(filter);
@ -762,6 +808,7 @@ impl DownloadQuery {
&downloads,
&self.dl.i.http,
&user_agent,
pot,
#[cfg(feature = "indicatif")]
pb.clone(),
)
@ -1006,6 +1053,7 @@ async fn download_single_file(
output: &Path,
http: &Client,
user_agent: &str,
pot: Option<&str>,
#[cfg(feature = "indicatif")] pb: Option<ProgressBar>,
) -> Result<()> {
// Check if file is already downloaded
@ -1102,6 +1150,7 @@ async fn download_single_file(
size.unwrap(),
offset,
user_agent,
pot,
#[cfg(feature = "indicatif")]
pb,
)
@ -1209,6 +1258,7 @@ async fn download_chunks_by_header(
// Use the `range` url parameter to download a stream in chunks.
// This ist used by YouTube's web player. The file size
// must be known beforehand (it is included in the stream url).
#[allow(clippy::too_many_arguments)]
async fn download_chunks_by_param(
http: &Client,
file: &mut File,
@ -1216,6 +1266,7 @@ async fn download_chunks_by_param(
size: u64,
offset: u64,
user_agent: &str,
pot: Option<&str>,
#[cfg(feature = "indicatif")] pb: Option<ProgressBar>,
) -> Result<()> {
let mut offset = offset;
@ -1228,8 +1279,15 @@ async fn download_chunks_by_param(
let range = get_download_range(offset, Some(size));
tracing::debug!("Fetching range {}-{}", range.start, range.end);
let mut urlp =
Url::parse_with_params(url, [("range", &format!("{}-{}", range.start, range.end))])
.map_err(|e| DownloadError::Progressive(format!("url parsing: {e}").into()))?;
if let Some(pot) = pot {
urlp.query_pairs_mut().append_pair("pot", pot);
}
let res = http
.get(format!("{}&range={}-{}", url, range.start, range.end))
.get(urlp)
.header(header::USER_AGENT, user_agent)
.header(header::ORIGIN, "https://www.youtube.com")
.header(header::REFERER, "https://www.youtube.com/")
@ -1277,6 +1335,7 @@ async fn download_streams(
downloads: &Vec<StreamDownload>,
http: &Client,
user_agent: &str,
pot: Option<&str>,
#[cfg(feature = "indicatif")] pb: Option<ProgressBar>,
) -> Result<()> {
let n = downloads.len();
@ -1288,6 +1347,7 @@ async fn download_streams(
&d.file,
http,
user_agent,
pot,
#[cfg(feature = "indicatif")]
pb.clone(),
)

30
notes/po_token.md Normal file
View file

@ -0,0 +1,30 @@
# About the new `pot` token
YouTube has implemented a new method to prevent downloaders and alternative clients from accessing
their videos. Now requests to YouTube's video servers require a `pot` URL parameter.
It is currently only required in the web player. The YTM and embedded player sends the token, too, but does not require it (this may change in the future).
The TV player does not use the token at all and is currently the best workaround. The only downside
is that the TV player does not return any video metadata like title and description text.
The first part of a video file (range: 0-1007959 bytes) can be downloaded without the token.
Requesting more of the file requires the pot token to be set, otherwise YouTube responds with a 403
error.
The pot token is base64-formatted and usually starts with a M
`MnToZ2brHmyo0ehfKtK_EWUq60dPYDXksNX_UsaniM_Uj6zbtiIZujCHY02hr7opxB_n3XHetJQCBV9cnNHovuhvDqrjfxsKR-sjn-eIxqv3qOZKphvyDpQzlYBnT2AXK41R-ti6iPonrvlvKIASNmYX2lhsEg==`
The token is generated from YouTubes Botguard script. The token is bound to the visitor data cookie
used to fetch the player data.
This feature has been A/B-tested for a few weeks. During that time, refetching the player in case
of a 403 download error often made things work again. As of 08.08.2024 this new feature seems to be
stabilized and retrying requests does not work any more.
## Getting a `pot` token
You need a real browser environment to run YouTube's botguard and obtain a pot token. The Invidious project has created a script to
<https://github.com/iv-org/youtube-trusted-session-generator/tree/master>.
The script opens YouTube's embedded video player, starts playback and extracts the visitor data