feat: add support for rustypipe-botguard to get PO tokens

This commit is contained in:
ThetaDev 2025-02-03 02:41:17 +01:00
parent 92340056f8
commit b90a252a5e
No known key found for this signature in database
GPG key ID: E319D3C5148D65B6
16 changed files with 313 additions and 100 deletions

View file

@ -24,6 +24,13 @@ jobs:
with:
cache-on-failure: "true"
- name: Download rustypipe-botguard
run: |
TARGET=$(rustc --version --verbose | grep "host:" | sed -e 's/^host: //')
curl -SsL -o rustypipe-botguard.gz "https://thetadev.de/dl/rustypipe-botguard-${TARGET}.gz"
sudo gunzip -c rustypipe-botguard.gz > /usr/local/bin/rustypipe-botguard
sudo chmod +x /usr/local/bin/rustypipe-botguard
- name: 📎 Clippy
run: cargo clippy --all --tests --features=rss,indicatif,audiotag -- -D warnings

1
.gitignore vendored
View file

@ -5,3 +5,4 @@
rustypipe_reports
rustypipe_cache*.json
bg_snapshot.bin

View file

@ -99,7 +99,7 @@ fancy-regex.workspace = true
thiserror.workspace = true
url.workspace = true
reqwest = { workspace = true, features = ["json", "gzip", "brotli"] }
tokio = { workspace = true, features = ["macros", "time"] }
tokio = { workspace = true, features = ["macros", "time", "process"] }
serde.workspace = true
serde_json.workspace = true
serde_with.workspace = true

View file

@ -75,7 +75,7 @@ videos can be downloaded in parallel for improved performance.
## `vdata`: Get visitor data
You can use the vdata command to get a new visitor data cookie. This feature may come in
You can use the vdata command to get a new visitor data ID. This feature may come in
handy for testing and reproducing A/B tests.
## `releases` Get YouTube Music new releases
@ -130,7 +130,7 @@ Fetch a list of all the items saved in your YouTube/YouTube Music profile.
and `ALL_PROXY`
- **Logging:** You can change the log level with the `RUST_LOG` environment variable, it
is set to `info` by default
- **Visitor data:** A custom visitor data cookie can be used with the `--vdata` flag
- **Visitor data:** A custom visitor data ID can be used with the `--vdata` flag
- **Authentication:** Use the commands `rustypipe login` and `rustypipe login --cookie`
to log into your Google account using either OAuth or YouTube cookies. With the
`--auth` flag you can use authentication for any request.

View file

@ -19,8 +19,8 @@ use rustypipe::{
model::{
richtext::{RichText, ToPlaintext},
traits::YtEntity,
ArtistId, Comment, MusicSearchResult, TrackItem, TrackType, UrlTarget, Verification,
YouTubeItem,
ArtistId, AudioCodec, Comment, MusicSearchResult, TrackItem, TrackType, UrlTarget,
Verification, YouTubeItem,
},
param::{search_filter, ChannelVideoTab, Country, Language, StreamFilter},
report::FileReporter,
@ -45,7 +45,7 @@ struct Cli {
/// Always generate a report (used for debugging)
#[clap(long, global = true)]
report: bool,
/// YouTube visitor data cookie
/// YouTube visitor data ID
#[clap(long, global = true)]
vdata: Option<String>,
/// YouTube content language
@ -299,7 +299,7 @@ enum Commands {
#[clap(long)]
pretty: bool,
},
/// Get a YouTube visitor data cookie
/// Get a YouTube visitor data ID
Vdata,
/// Log in using your Google account
Login {
@ -925,7 +925,9 @@ async fn run() -> anyhow::Result<()> {
let mut filter = StreamFilter::new();
if let Some(res) = resolution {
if res == 0 {
filter = filter.no_video();
filter = filter
.no_video()
.audio_codecs([AudioCodec::Mp4a, AudioCodec::Opus]);
} else {
filter = filter.video_max_res(res);
}
@ -1716,7 +1718,7 @@ async fn run() -> anyhow::Result<()> {
}
}
Commands::Vdata => {
let vd = rp.query().get_visitor_data().await?;
let vd = rp.query().get_visitor_data(true).await?;
println!("{vd}");
}
Commands::Login {

View file

@ -94,7 +94,7 @@ pub async fn run_test(
let rp = rp.clone();
let pb = pb.clone();
async move {
let visitor_data = rp.query().get_visitor_data().await.unwrap();
let visitor_data = rp.query().get_visitor_data(true).await.unwrap();
let query = rp.query().visitor_data(&visitor_data);
let is_present = match ab {
ABTest::AttributedTextDescription => attributed_text_description(&query).await,

View file

@ -13,8 +13,8 @@ pub enum DownloadError {
#[error("http error: {0}")]
Http(#[from] reqwest::Error),
/// 403 error trying to download video
#[error("YouTube returned 403 error")]
Forbidden(ClientType),
#[error("YouTube returned 403 error; visitor_data={}", .1.as_deref().unwrap_or_default())]
Forbidden(ClientType, Option<String>),
/// File IO error
#[error(transparent)]
Io(#[from] std::io::Error),

View file

@ -21,7 +21,7 @@ use rand::Rng;
use regex::Regex;
use reqwest::{header, Client, StatusCode, Url};
use rustypipe::{
client::{ClientType, RustyPipe, DEFAULT_PLAYER_CLIENT_ORDER},
client::{ClientType, RustyPipe},
model::{
traits::{FileFormat, YtEntity},
AudioCodec, TrackItem, VideoCodec, VideoPlayer,
@ -698,9 +698,9 @@ impl DownloadQuery {
.await
{
Ok(res) => return Ok(res),
Err(DownloadError::Forbidden(c)) => {
Err(DownloadError::Forbidden(c, vd)) => {
failed_client = Some(c);
DownloadError::Forbidden(c)
DownloadError::Forbidden(c, vd)
}
Err(DownloadError::Http(e)) => {
if !e.is_timeout() {
@ -770,7 +770,7 @@ impl DownloadQuery {
.as_ref()
.or(self.dl.i.client_types.as_ref())
.map(Vec::as_slice)
.unwrap_or(DEFAULT_PLAYER_CLIENT_ORDER),
.unwrap_or(q.player_client_order()),
);
// If the last download failed, try another client if possible
@ -885,7 +885,14 @@ impl DownloadQuery {
.map_err(|e| {
if let DownloadError::Http(e) = &e {
if e.status() == Some(StatusCode::FORBIDDEN) {
return DownloadError::Forbidden(player_data.client_type);
// 403 errors may occur due to bad visitor data IDs
if let Some(vd) = &player_data.visitor_data {
q.remove_visitor_data(vd);
}
return DownloadError::Forbidden(
player_data.client_type,
player_data.visitor_data.clone(),
);
}
}
e
@ -1410,7 +1417,6 @@ async fn download_chunks_by_param(
));
}
tracing::debug!("Retrieving chunks...");
let mut stream = res.bytes_stream();
while let Some(item) = stream.next().await {
// Retrieve chunk.

View file

@ -3,12 +3,12 @@
When YouTube introduces a new feature, it does so gradually. When a user creates a new
session, YouTube decided randomly which new features should be enabled.
YouTube sessions are identified by the visitor data cookie. This cookie is sent with
YouTube sessions are identified by the visitor data ID. This cookie is sent with
every API request using the `context.client.visitor_data` JSON parameter. It is also
returned in the `responseContext.visitorData` response parameter and stored as the
`__SECURE-YEC` cookie.
By sending the same visitor data cookie, A/B tests can be reproduced, which is important
By sending the same visitor data ID, A/B tests can be reproduced, which is important
for testing alternative YouTube clients.
This page lists all A/B tests that were encountered while maintaining the RustyPipe
@ -381,7 +381,7 @@ YouTube also changed the way the full discography page is fetched, surprisingly
it easier for alternative clients. The discography page now has its own content ID in
the format of `MPAD<channel id>` (Music Page Artist Discography). This page can be
fetched with a regular browse request without requiring parameters to be parsed or a
visitor data cookie to be set, as it was the case with the old system.
visitor data ID to be set, as it was the case with the old system.
**OLD**

View file

@ -16,7 +16,7 @@ The pot token is base64-formatted and usually starts with a M
`MnToZ2brHmyo0ehfKtK_EWUq60dPYDXksNX_UsaniM_Uj6zbtiIZujCHY02hr7opxB_n3XHetJQCBV9cnNHovuhvDqrjfxsKR-sjn-eIxqv3qOZKphvyDpQzlYBnT2AXK41R-ti6iPonrvlvKIASNmYX2lhsEg==`
The token is generated from YouTubes Botguard script. The token is bound to the visitor data cookie
The token is generated from YouTubes Botguard script. The token is bound to the visitor data ID
used to fetch the player data.
This feature has been A/B-tested for a few weeks. During that time, refetching the player in case

View file

@ -25,6 +25,7 @@ mod video_details;
mod channel_rss;
use std::collections::HashMap;
use std::ffi::OsString;
use std::path::PathBuf;
use std::sync::{Arc, RwLock};
use std::{borrow::Cow, fmt::Debug, time::Duration};
@ -97,6 +98,13 @@ impl ClientType {
fn needs_deobf(self) -> bool {
!matches!(self, ClientType::Ios)
}
fn needs_po_token(self) -> bool {
matches!(
self,
ClientType::Desktop | ClientType::DesktopMusic | ClientType::Mobile
)
}
}
/// YouTube context request parameter
@ -317,7 +325,7 @@ pub(crate) const DEFAULT_UA: &str = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit
pub(crate) const MOBILE_UA: &str = "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.6778.135 Mobile Safari/537.36";
pub(crate) const TV_UA: &str = "Mozilla/5.0 (SMART-TV; Linux; Tizen 5.0) AppleWebKit/538.1 (KHTML, like Gecko) Version/5.0 NativeTVAds Safari/538.1";
const CONSENT_COOKIE: &str = "SOCS=CAISAiAD";
pub(crate) const CONSENT_COOKIE: &str = "SOCS=CAISAiAD";
const YOUTUBEI_V1_URL: &str = "https://www.youtube.com/youtubei/v1/";
const YOUTUBEI_V1_GAPIS_URL: &str = "https://youtubei.googleapis.com/youtubei/v1/";
@ -352,13 +360,6 @@ const OAUTH_SCOPES: &str = "http://gdata.youtube.com https://www.googleapis.com/
static CLIENT_VERSION_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r#""INNERTUBE_CONTEXT_CLIENT_VERSION":"([\w\d\._-]+?)""#).unwrap());
/// Default order of client types when fetching player data
///
/// The order may change in the future in case YouTube applies changes to their
/// platform that disable a client or make it less reliable.
pub const DEFAULT_PLAYER_CLIENT_ORDER: &[ClientType] =
&[ClientType::Ios, ClientType::Tv, ClientType::Android];
/// The RustyPipe client used to access YouTube's API
///
/// RustyPipe uses an [`Arc`] internally, so if you are using the client
@ -378,6 +379,7 @@ struct RustyPipeRef {
default_opts: RustyPipeOpts,
user_agent: Cow<'static, str>,
visitor_data_cache: VisitorDataCache,
botguard: Option<BotguardCfg>,
}
#[derive(Clone)]
@ -399,6 +401,12 @@ pub struct RustyPipeBuilder {
user_agent: Option<String>,
default_opts: RustyPipeOpts,
storage_dir: Option<PathBuf>,
botguard_bin: DefaultOpt<OsString>,
}
struct BotguardCfg {
program: OsString,
snapshot_file: PathBuf,
}
enum DefaultOpt<T> {
@ -415,6 +423,13 @@ impl<T> DefaultOpt<T> {
DefaultOpt::Default => Some(f()),
}
}
fn or_default_opt<F: FnOnce() -> Option<T>>(self, f: F) -> Option<T> {
match self {
DefaultOpt::Some(x) => Some(x),
DefaultOpt::None => None,
DefaultOpt::Default => f(),
}
}
}
/// # RustyPipe query
@ -477,7 +492,7 @@ impl<T> DefaultOpt<T> {
///
/// ## Options
///
/// You can set the language, country and visitor data cookie for individual requests.
/// You can set the language, country and visitor data ID for individual requests.
///
/// ```
/// # use rustypipe::client::RustyPipe;
@ -626,6 +641,7 @@ impl RustyPipeBuilder {
n_http_retries: 2,
user_agent: None,
storage_dir: None,
botguard_bin: DefaultOpt::Default,
}
}
@ -690,12 +706,25 @@ impl RustyPipeBuilder {
let visitor_data_cache = VisitorDataCache::new(http.clone());
let botguard_bin = self.botguard_bin.or_default_opt(|| {
let n = OsString::from("rustypipe-botguard");
let out = std::process::Command::new(&n)
.arg("--version")
.output()
.ok()?;
if out.status.success() {
Some(n)
} else {
None
}
});
Ok(RustyPipe {
inner: Arc::new(RustyPipeRef {
http,
storage,
reporter: self.reporter.or_default(|| {
let mut report_dir = storage_dir;
let mut report_dir = storage_dir.clone();
report_dir.push(DEFAULT_REPORT_DIR);
Box::new(FileReporter::new(report_dir))
}),
@ -709,6 +738,14 @@ impl RustyPipeBuilder {
default_opts: self.default_opts,
user_agent,
visitor_data_cache,
botguard: botguard_bin.map(|program| {
let mut snapshot_file = storage_dir;
snapshot_file.push("bg_snapshot.bin");
BotguardCfg {
program,
snapshot_file,
}
}),
}),
})
}
@ -868,14 +905,14 @@ impl RustyPipeBuilder {
self
}
/// Set the YouTube visitor data cookie
/// Set the YouTube visitor data ID
///
/// YouTube assigns a session cookie to each user which is used for personalized
/// recommendations. By default, RustyPipe does not send this cookie to preserve
/// user privacy. For requests that mandatate the cookie, a new one is requested
/// for every query.
///
/// This option allows you to manually set the visitor data cookie of your client,
/// This option allows you to manually set the visitor data ID of your client,
/// allowing you to get personalized recommendations or reproduce A/B tests.
///
/// Note that YouTube has a rate limit on the number of requests from a single
@ -888,7 +925,7 @@ impl RustyPipeBuilder {
self
}
/// Set the YouTube visitor data cookie to an optional value
/// Set the YouTube visitor data ID to an optional value
///
/// see also [`RustyPipeBuilder::visitor_data`]
///
@ -898,6 +935,26 @@ impl RustyPipeBuilder {
self.default_opts.visitor_data = visitor_data.map(S::into);
self
}
/// Disable RustyPipe Botguard
///
/// By default, RustyPipe uses the `rustypipe-botguard` binary if it is available. If you want to
/// use RustyPipe without Botguard, you can disable it.
pub fn no_botguard(mut self) -> Self {
self.botguard_bin = DefaultOpt::None;
self
}
/// Enable RustyPipe Botguard using the given binary
///
/// Botguard is required to generate PO tokens for accessing streams on browser-based clients.
/// By default, RustyPipe uses the `rustypipe-botguard` binary if it is available.
///
/// More information: <https://codeberg.org/ThetaDev/rustypipe-botguard>
pub fn botguard_bin<S: Into<OsString>>(mut self, botguard_bin: S) -> Self {
self.botguard_bin = DefaultOpt::Some(botguard_bin.into());
self
}
}
impl Default for RustyPipe {
@ -1191,17 +1248,6 @@ impl RustyPipe {
}
}
/// Request a new visitor data cookie from YouTube
///
/// Since the cookie is shared between YT and YTM and the YTM page loads faster,
/// we request that.
///
/// Sometimes YouTube does not set the `__Secure-YEC` cookie. In this case, the
/// visitor data is extracted from the html page.
async fn get_visitor_data(&self) -> Result<String, Error> {
self.inner.visitor_data_cache.new_visitor_data().await
}
/// Get a new device code for logging into YouTube
pub async fn user_auth_get_code(&self) -> Result<OauthDeviceCode, Error> {
tracing::debug!("getting OAuth user code");
@ -1618,14 +1664,14 @@ impl RustyPipeQuery {
self
}
/// Set the YouTube visitor data cookie
/// Set the YouTube visitor data ID
///
/// YouTube assigns a session cookie to each user which is used for personalized
/// recommendations. By default, RustyPipe does not send this cookie to preserve
/// user privacy. For requests that mandatate the cookie, a new one is requested
/// for every query.
///
/// This option allows you to manually set the visitor data cookie of your query,
/// This option allows you to manually set the visitor data ID of your query,
/// allowing you to get personalized recommendations or reproduce A/B tests.
///
/// Note that YouTube has a rate limit on the number of requests from a single
@ -1636,7 +1682,7 @@ impl RustyPipeQuery {
self
}
/// Set the YouTube visitor data cookie to an optional value
/// Set the YouTube visitor data ID to an optional value
///
/// see also [`RustyPipeQuery::visitor_data`]
#[must_use]
@ -1845,7 +1891,7 @@ impl RustyPipeQuery {
/// - `ctype`: Client type (`Desktop`, `DesktopMusic`, `Android`, ...)
/// - `method`: HTTP method
/// - `endpoint`: YouTube API endpoint (`https://www.youtube.com/youtubei/v1/<XYZ>?key=...`)
/// - `visitor_data`: YouTube visitor data cookie
/// - `visitor_data`: YouTube visitor data ID
async fn request_builder(
&self,
ctype: ClientType,
@ -1987,14 +2033,75 @@ impl RustyPipeQuery {
Some(format!("SAPISIDHASH {time_now}_{sapisidhash_hex}"))
}
/// Get a YouTube visitor data cookie, which is necessary for certain requests
pub async fn get_visitor_data(&self) -> Result<String, Error> {
/// Get a YouTube visitor data ID, which is necessary for certain requests
pub async fn get_visitor_data(&self, force_new: bool) -> Result<String, Error> {
if force_new {
return self
.client
.inner
.visitor_data_cache
.new_visitor_data()
.await;
}
match &self.opts.visitor_data {
Some(vd) => Ok(vd.clone()),
None => self.client.get_visitor_data().await,
None => self.client.inner.visitor_data_cache.get().await,
}
}
/// Remove a YouTube visitor data ID from the cache so it is not used again
pub fn remove_visitor_data(&self, visitor_data: &str) {
self.client.inner.visitor_data_cache.remove(visitor_data);
}
/// Get PO tokens
async fn get_po_tokens(&self, idents: &[&str]) -> Result<Vec<String>, Error> {
let bg = self
.client
.inner
.botguard
.as_ref()
.ok_or(Error::Extraction(ExtractionError::Botguard(
"not enabled".into(),
)))?;
let cmd = tokio::process::Command::new(&bg.program)
.arg("--snapshot-file")
.arg(&bg.snapshot_file)
.arg("--")
.args(idents)
.output()
.await
.map_err(|e| Error::Extraction(ExtractionError::Botguard(e.to_string().into())))?;
if !cmd.status.success() {
return Err(Error::Extraction(ExtractionError::Botguard(
String::from_utf8_lossy(&cmd.stderr).into_owned().into(),
)));
}
let output = String::from_utf8(cmd.stdout)
.map_err(|e| Error::Extraction(ExtractionError::Botguard(e.to_string().into())))?;
let tokens = output
.split_whitespace()
.take(idents.len())
.map(str::to_owned)
.collect::<Vec<_>>();
if tokens.len() != idents.len() {
return Err(Error::Extraction(ExtractionError::Botguard(
"too few tokens returned".into(),
)));
}
tracing::debug!("generated PO token");
Ok(tokens)
}
/// Get a PO token
pub async fn get_po_token<S: AsRef<str>>(self, ident: S) -> Result<String, Error> {
self.get_po_tokens(&[ident.as_ref()])
.await
.map(|res| res.into_iter().next().unwrap())
}
async fn yt_request_attempt<R: DeserializeOwned + MapResponse<M> + Debug, M>(
&self,
request: &Request,
@ -2128,6 +2235,7 @@ impl RustyPipeQuery {
client_type: ctype,
artist: ctx_src.artist,
authenticated: self.opts.auth.unwrap_or_default(),
session_po_token: ctx_src.session_po_token,
};
let request = self
@ -2284,6 +2392,7 @@ struct MapRespCtx<'a> {
client_type: ClientType,
artist: Option<ArtistId>,
authenticated: bool,
session_po_token: Option<&'a str>,
}
/// Options to give to the mapper when making requests;
@ -2294,6 +2403,7 @@ struct MapRespOptions<'a> {
deobf: Option<&'a DeobfData>,
artist: Option<ArtistId>,
unlocalized: bool,
session_po_token: Option<&'a str>,
}
#[allow(clippy::needless_lifetimes)]
@ -2309,6 +2419,7 @@ impl<'a> MapRespCtx<'a> {
client_type: ClientType::Desktop,
artist: None,
authenticated: false,
session_po_token: None,
}
}
}
@ -2370,11 +2481,23 @@ mod tests {
#[tokio::test]
async fn get_visitor_data() {
let rp = RustyPipe::new();
let visitor_data = rp.get_visitor_data().await.unwrap();
let visitor_data = rp.query().get_visitor_data(true).await.unwrap();
assert!(
visitor_data.starts_with("Cg") && visitor_data.len() > 23,
"invalid visitor data: {visitor_data}"
);
}
#[tokio::test]
async fn get_po_token() {
let rp = RustyPipe::builder().build().unwrap();
let ident = "Cgt4eDYyVVJveGQtbyiLyvu8BjIKCgJERRIEEgAgKw==";
let po_token = rp.query().get_po_token(ident).await.unwrap();
let token_bts = data_encoding::BASE64URL
.decode(po_token.as_bytes())
.unwrap();
assert_eq!(token_bts.len(), ident.len() + 74);
}
}

View file

@ -26,7 +26,6 @@ use super::{
player::{self, Format},
},
ClientType, MapRespCtx, MapRespOptions, MapResponse, MapResult, RustyPipeQuery,
DEFAULT_PLAYER_CLIENT_ORDER,
};
#[derive(Debug, Serialize)]
@ -41,6 +40,9 @@ struct QPlayer<'a> {
content_check_ok: bool,
/// Probably refers to allowing sensitive content, too
racy_check_ok: bool,
/// Botguard data
#[serde(skip_serializing_if = "Option::is_none")]
service_integrity_dimensions: Option<ServiceIntegrity>,
}
#[derive(Debug, Serialize)]
@ -70,10 +72,16 @@ struct QDrmLicense<'a> {
drm_video_feature: &'a str,
}
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
struct ServiceIntegrity {
po_token: String,
}
impl RustyPipeQuery {
/// Get YouTube player data (video/audio streams + basic metadata)
pub async fn player<S: AsRef<str> + Debug>(&self, video_id: S) -> Result<VideoPlayer, Error> {
self.player_from_clients(video_id, DEFAULT_PLAYER_CLIENT_ORDER)
self.player_from_clients(video_id, self.player_client_order())
.await
}
@ -142,28 +150,46 @@ impl RustyPipeQuery {
client_type: ClientType,
) -> Result<VideoPlayer, Error> {
let video_id = video_id.as_ref();
let mut deobf = None;
let request_body = if client_type.needs_deobf() {
deobf = Some(self.client.get_deobf_data().await?);
QPlayer {
playback_context: Some(QPlaybackContext {
let visitor_data = self.get_visitor_data(false).await?;
let (deobf, (service_integrity_dimensions, session_po_token)) = tokio::try_join!(
async {
if client_type.needs_deobf() {
Ok::<_, Error>(Some(self.client.get_deobf_data().await?))
} else {
Ok(None)
}
},
async {
if client_type.needs_po_token() {
let mut po_tokens = self
.get_po_tokens(&[video_id, &visitor_data])
.await?
.into_iter();
let po_token = po_tokens.next().unwrap();
let session_po_token = po_tokens.next().unwrap();
Ok((Some(ServiceIntegrity { po_token }), Some(session_po_token)))
} else {
Ok((None, None))
}
}
)?;
let playback_context = deobf.as_ref().map(|deobf| QPlaybackContext {
content_playback_context: QContentPlaybackContext {
signature_timestamp: &deobf.as_ref().unwrap().sts,
signature_timestamp: &deobf.sts,
referer: format!("https://www.youtube.com/watch?v={video_id}"),
},
}),
});
let request_body = QPlayer {
playback_context,
video_id,
content_check_ok: true,
racy_check_ok: true,
}
} else {
QPlayer {
playback_context: None,
video_id,
content_check_ok: true,
racy_check_ok: true,
}
service_integrity_dimensions,
};
self.execute_request_ctx::<response::Player, _, _>(
@ -173,14 +199,28 @@ impl RustyPipeQuery {
"player",
&request_body,
MapRespOptions {
visitor_data: Some(&visitor_data),
deobf: deobf.as_ref(),
unlocalized: true,
session_po_token: session_po_token.as_deref(),
..Default::default()
},
)
.await
}
/// Get the default order of client types when fetching player data
///
/// The order may change in the future in case YouTube applies changes to their
/// platform that disable a client or make it less reliable.
pub fn player_client_order(&self) -> &'static [ClientType] {
if self.client.inner.botguard.is_some() {
&[ClientType::Desktop, ClientType::Ios, ClientType::Tv]
} else {
&[ClientType::Ios, ClientType::Tv]
}
}
/// Get a license to play back DRM protected videos
///
/// Requires authentication (either via OAuth or cookies).
@ -250,6 +290,7 @@ impl MapResponse<VideoPlayer> for response::Player {
"country" => Some(UnavailabilityReason::Geoblocked),
"version" | "websites" => Some(UnavailabilityReason::UnsupportedClient),
"bot" => Some(UnavailabilityReason::IpBan),
"later." => Some(UnavailabilityReason::TryAgain),
_ => None,
})
.unwrap_or_default();
@ -327,7 +368,7 @@ impl MapResponse<VideoPlayer> for response::Player {
};
let streams = if !is_live {
let mut mapper = StreamsMapper::new(ctx.deobf)?;
let mut mapper = StreamsMapper::new(ctx.deobf, ctx.session_po_token)?;
mapper.map_streams(streaming_data.formats);
mapper.map_streams(streaming_data.adaptive_formats);
let mut res = mapper.output()?;
@ -442,8 +483,9 @@ impl MapResponse<VideoPlayer> for response::Player {
}
}
struct StreamsMapper {
struct StreamsMapper<'a> {
deobf: Option<Deobfuscator>,
session_po_token: Option<&'a str>,
streams: Streams,
warnings: Vec<String>,
/// First stream mapping error
@ -461,8 +503,11 @@ struct Streams {
audio_streams: Vec<AudioStream>,
}
impl StreamsMapper {
fn new(deobf_data: Option<&DeobfData>) -> Result<Self, DeobfError> {
impl<'a> StreamsMapper<'a> {
fn new(
deobf_data: Option<&DeobfData>,
session_po_token: Option<&'a str>,
) -> Result<Self, DeobfError> {
let deobf = match deobf_data {
Some(deobf_data) => Some(Deobfuscator::new(deobf_data)?),
None => None,
@ -470,6 +515,7 @@ impl StreamsMapper {
Ok(Self {
deobf,
session_po_token,
streams: Streams::default(),
warnings: Vec::new(),
first_err: None,
@ -609,6 +655,10 @@ impl StreamsMapper {
}?;
self.deobf_nsig(&mut url_params)?;
if let Some(pot) = self.session_po_token {
url_params.insert("pot".to_owned(), pot.to_owned());
}
let url = Url::parse_with_params(url_base.as_str(), url_params.iter())
.map_err(|_| ExtractionError::InvalidData("could not combine URL".into()))?;
@ -880,6 +930,7 @@ mod tests {
client_type,
artist: None,
authenticated: false,
session_po_token: None,
})
.unwrap();
@ -905,7 +956,7 @@ mod tests {
#[test]
fn cipher_to_url() {
let signature_cipher = "s=w%3DAe%3DA6aDNQLkViKS7LOm9QtxZJHKwb53riq9qEFw-ecBWJCAiA%3DcEg0tn3dty9jEHszfzh4Ud__bg9CEHVx4ix-7dKsIPAhIQRw8JQ0qOA&sp=sig&url=https://rr5---sn-h0jelnez.googlevideo.com/videoplayback%3Fexpire%3D1659376413%26ei%3Dvb7nYvH5BMK8gAfBj7ToBQ%26ip%3D2003%253Ade%253Aaf06%253A6300%253Ac750%253A1b77%253Ac74a%253A80e3%26id%3Do-AB_BABwrXZJN428ZwDxq5ScPn2AbcGODnRlTVhCQ3mj2%26itag%3D251%26source%3Dyoutube%26requiressl%3Dyes%26mh%3DhH%26mm%3D31%252C26%26mn%3Dsn-h0jelnez%252Csn-4g5ednsl%26ms%3Dau%252Conr%26mv%3Dm%26mvi%3D5%26pl%3D37%26initcwndbps%3D1588750%26spc%3DlT-Khi831z8dTejFIRCvCEwx_6romtM%26vprv%3D1%26mime%3Daudio%252Fwebm%26ns%3Db_Mq_qlTFcSGlG9RpwpM9xQH%26gir%3Dyes%26clen%3D3781277%26dur%3D229.301%26lmt%3D1655510291473933%26mt%3D1659354538%26fvip%3D5%26keepalive%3Dyes%26fexp%3D24001373%252C24007246%26c%3DWEB%26rbqsm%3Dfr%26txp%3D4532434%26n%3Dd2g6G2hVqWIXxedQ%26sparams%3Dexpire%252Cei%252Cip%252Cid%252Citag%252Csource%252Crequiressl%252Cspc%252Cvprv%252Cmime%252Cns%252Cgir%252Cclen%252Cdur%252Clmt%26lsparams%3Dmh%252Cmm%252Cmn%252Cms%252Cmv%252Cmvi%252Cpl%252Cinitcwndbps%26lsig%3DAG3C_xAwRQIgCKCGJ1iu4wlaGXy3jcJyU3inh9dr1FIfqYOZEG_MdmACIQCbungkQYFk7EhD6K2YvLaHFMjKOFWjw001_tLb0lPDtg%253D%253D";
let mut mapper = StreamsMapper::new(Some(&DEOBF_DATA)).unwrap();
let mut mapper = StreamsMapper::new(Some(&DEOBF_DATA), None).unwrap();
let url = mapper
.map_url(&None, &Some(signature_cipher.to_owned()))
.unwrap()

View file

@ -58,6 +58,9 @@ pub enum ExtractionError {
/// Error deobfuscating YouTube's URL signatures
#[error("deobfuscation error: {0}")]
Deobfuscation(Cow<'static, str>),
/// Error generating Botguard tokens
#[error("botguard error: {0}")]
Botguard(Cow<'static, str>),
/// YouTube returned data that does not match the queried ID
///
/// Specifically YouTube may return this video <https://www.youtube.com/watch?v=aQvGIIdgFDM>,
@ -102,6 +105,8 @@ pub enum UnavailabilityReason {
OfflineLivestream,
/// YouTube banned your IP address from accessing the platform without an account
IpBan,
/// Video temporarily unavailable (rate limit)
TryAgain,
/// Video cant be played for other reasons
#[default]
Unplayable,
@ -120,6 +125,7 @@ impl Display for UnavailabilityReason {
UnavailabilityReason::MembersOnly => f.write_str("members-only"),
UnavailabilityReason::OfflineLivestream => f.write_str("offline stream"),
UnavailabilityReason::IpBan => f.write_str("ip-ban"),
UnavailabilityReason::TryAgain => f.write_str("try again"),
UnavailabilityReason::Unplayable => f.write_str("unplayable"),
}
}
@ -220,7 +226,13 @@ impl Error {
Ok(status) => status.is_server_error() || status == StatusCode::TOO_MANY_REQUESTS,
Err(_) => false,
},
Self::Extraction(ExtractionError::InvalidData(_)) => true,
Self::Extraction(
ExtractionError::InvalidData(_)
| ExtractionError::Unavailable {
reason: UnavailabilityReason::TryAgain,
..
},
) => true,
_ => false,
}
}
@ -232,9 +244,10 @@ impl ExtractionError {
matches!(
self,
ExtractionError::Unavailable {
reason: UnavailabilityReason::UnsupportedClient,
reason: UnavailabilityReason::UnsupportedClient | UnavailabilityReason::TryAgain,
..
} | ExtractionError::WrongResult(_)
| ExtractionError::Botguard(_)
)
}

View file

@ -149,7 +149,7 @@ pub struct VideoPlayer {
pub drm: Option<VideoPlayerDrm>,
/// Client type with which the player was fetched
pub client_type: ClientType,
/// YouTube visitor data cookie
/// YouTube visitor data ID
pub visitor_data: Option<String>,
}
@ -615,7 +615,7 @@ pub struct Playlist {
pub last_update: Option<Date>,
/// Textual last update date
pub last_update_txt: Option<String>,
/// YouTube visitor data cookie
/// YouTube visitor data ID
pub visitor_data: Option<String>,
}
@ -683,7 +683,7 @@ pub struct VideoDetails {
///
/// Is initially empty.
pub latest_comments: Paginator<Comment>,
/// YouTube visitor data cookie
/// YouTube visitor data ID
pub visitor_data: Option<String>,
}
@ -820,7 +820,7 @@ pub struct Channel<T> {
pub has_shorts: bool,
/// Does the channel have a *Live* tab?
pub has_live: bool,
/// YouTube visitor data cookie
/// YouTube visitor data ID
pub visitor_data: Option<String>,
/// Content fetched from the channel
pub content: T,
@ -905,7 +905,7 @@ pub struct SearchResult<T> {
/// for the corrected search term and displays it on top of the
/// search results page.
pub corrected_query: Option<String>,
/// YouTube visitor data cookie
/// YouTube visitor data ID
pub visitor_data: Option<String>,
}

View file

@ -6,7 +6,7 @@ use regex::Regex;
use reqwest::{header, Client};
use crate::{
client::YOUTUBE_MUSIC_HOME_URL,
client::{CONSENT_COOKIE, YOUTUBE_MUSIC_HOME_URL},
error::{Error, ExtractionError},
util,
};
@ -35,9 +35,9 @@ struct VisitorDataCacheRef {
static VISITOR_DATA_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r#""visitorData":"([\w\d_\-%]+?)""#).unwrap());
/// Number of requests after which a new token is requested
const REQ_LIMIT: u32 = 10;
/// Maximum size of the cache (-1)
const MAX_SIZE: usize = 99;
const REQ_LIMIT: u32 = 50;
/// Maximum size of the cache
const MAX_SIZE: usize = 20;
impl VisitorDataCache {
pub fn new(http: Client) -> Self {
@ -59,6 +59,7 @@ impl VisitorDataCache {
.get(YOUTUBE_MUSIC_HOME_URL)
.header(header::ORIGIN, YOUTUBE_MUSIC_HOME_URL)
.header(header::REFERER, YOUTUBE_MUSIC_HOME_URL)
.header(header::COOKIE, CONSENT_COOKIE)
.send()
.await?;
@ -100,10 +101,11 @@ impl VisitorDataCache {
}
pub async fn new_visitor_data(&self) -> Result<String, Error> {
let vd = self.get_visitor_data().await.unwrap();
self.inner
.req_counter
.store(0, std::sync::atomic::Ordering::SeqCst);
let vd = self.get_visitor_data().await.unwrap();
.store(0, std::sync::atomic::Ordering::Relaxed);
let mut vds = self.inner.visitor_data.write().unwrap();
for _ in 0..(vds.len().saturating_sub(MAX_SIZE)) {
let rem = vds.remove(0);
@ -119,9 +121,12 @@ impl VisitorDataCache {
if self
.inner
.req_counter
.fetch_add(1, std::sync::atomic::Ordering::SeqCst)
.fetch_add(1, std::sync::atomic::Ordering::Relaxed)
>= REQ_LIMIT
{
self.inner
.req_counter
.store(0, std::sync::atomic::Ordering::Relaxed);
let nc = self.clone();
tokio::spawn(async move { nc.new_visitor_data().await });
}
@ -138,6 +143,14 @@ impl VisitorDataCache {
// Fetch new visitor data if the cache is empty
self.new_visitor_data().await
}
pub fn remove(&self, visitor_data: &str) {
let mut vds = self.inner.visitor_data.write().unwrap();
if let Some(i) = vds.iter().position(|x| x == visitor_data) {
vds.remove(i);
tracing::debug!("visitor data {visitor_data} removed from cache");
}
}
}
#[cfg(test)]

View file

@ -139,12 +139,9 @@ async fn get_player_from_client(#[case] client_type: ClientType, rp: RustyPipe)
assert_eq!(audio.format, AudioFormat::Webm);
assert_eq!(audio.codec, AudioCodec::Opus);
// Desktop client now requires pot token so the streams cannot be tested here
if !matches!(client_type, ClientType::Desktop | ClientType::Mobile) {
check_video_stream(video).await;
check_video_stream(audio).await;
}
}
assert!(player_data.expires_in_seconds > 10000);
}