feat: add session po token cache
This commit is contained in:
parent
29c854b20d
commit
b72b501b6d
15 changed files with 234 additions and 61 deletions
|
|
@ -70,6 +70,9 @@ struct Cli {
|
|||
/// Disable Botguard
|
||||
#[clap(long, global = true)]
|
||||
no_botguard: bool,
|
||||
/// Enable caching for session-bound PO tokens
|
||||
#[clap(long, global = true)]
|
||||
po_token_cache: bool,
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
|
|
@ -913,6 +916,9 @@ async fn run() -> anyhow::Result<()> {
|
|||
if cli.no_botguard {
|
||||
rp = rp.no_botguard();
|
||||
}
|
||||
if cli.po_token_cache {
|
||||
rp = rp.po_token_cache();
|
||||
}
|
||||
if cli.auth {
|
||||
rp = rp.authenticated();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -403,11 +403,22 @@ pub struct RustyPipeBuilder {
|
|||
default_opts: RustyPipeOpts,
|
||||
storage_dir: Option<PathBuf>,
|
||||
botguard_bin: DefaultOpt<OsString>,
|
||||
po_token_cache: bool,
|
||||
}
|
||||
|
||||
struct BotguardCfg {
|
||||
program: OsString,
|
||||
snapshot_file: PathBuf,
|
||||
po_token_cache: bool,
|
||||
}
|
||||
|
||||
/// Proof-of-origin token
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct PoToken {
|
||||
/// PO token value
|
||||
pub po_token: String,
|
||||
/// Date until which the token is valid
|
||||
pub valid_until: OffsetDateTime,
|
||||
}
|
||||
|
||||
enum DefaultOpt<T> {
|
||||
|
|
@ -643,6 +654,7 @@ impl RustyPipeBuilder {
|
|||
user_agent: None,
|
||||
storage_dir: None,
|
||||
botguard_bin: DefaultOpt::Default,
|
||||
po_token_cache: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -705,7 +717,7 @@ impl RustyPipeBuilder {
|
|||
})
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
let visitor_data_cache = VisitorDataCache::new(http.clone());
|
||||
let visitor_data_cache = VisitorDataCache::new(http.clone(), 50, 20);
|
||||
|
||||
let botguard_bin = self.botguard_bin.or_default_opt(|| {
|
||||
let n = OsString::from("rustypipe-botguard");
|
||||
|
|
@ -745,6 +757,7 @@ impl RustyPipeBuilder {
|
|||
BotguardCfg {
|
||||
program,
|
||||
snapshot_file,
|
||||
po_token_cache: self.po_token_cache,
|
||||
}
|
||||
}),
|
||||
}),
|
||||
|
|
@ -941,6 +954,7 @@ impl RustyPipeBuilder {
|
|||
///
|
||||
/// By default, RustyPipe uses the `rustypipe-botguard` binary if it is available. If you want to
|
||||
/// use RustyPipe without Botguard, you can disable it.
|
||||
#[must_use]
|
||||
pub fn no_botguard(mut self) -> Self {
|
||||
self.botguard_bin = DefaultOpt::None;
|
||||
self
|
||||
|
|
@ -952,10 +966,24 @@ impl RustyPipeBuilder {
|
|||
/// By default, RustyPipe uses the `rustypipe-botguard` binary if it is available.
|
||||
///
|
||||
/// More information: <https://codeberg.org/ThetaDev/rustypipe-botguard>
|
||||
#[must_use]
|
||||
pub fn botguard_bin<S: Into<OsString>>(mut self, botguard_bin: S) -> Self {
|
||||
self.botguard_bin = DefaultOpt::Some(botguard_bin.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable caching for session-bound PO tokens
|
||||
///
|
||||
/// By default, RustyPipe calls Botguard for every player request to fetch both a
|
||||
/// content-bound and a session-bound PO token.
|
||||
///
|
||||
/// With caching enabled, the session-bound PO tokens are stored and reused.
|
||||
/// Content-bound PO tokens are not used (they are not mandatory at the moment).
|
||||
#[must_use]
|
||||
pub fn po_token_cache(mut self) -> Self {
|
||||
self.po_token_cache = true;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for RustyPipe {
|
||||
|
|
@ -2057,11 +2085,14 @@ impl RustyPipeQuery {
|
|||
}
|
||||
|
||||
/// Get PO tokens
|
||||
async fn get_po_tokens(&self, idents: &[&str]) -> Result<Option<Vec<String>>, Error> {
|
||||
let bg = match self.client.inner.botguard.as_ref() {
|
||||
Some(bg) => bg,
|
||||
None => return Ok(None),
|
||||
};
|
||||
async fn get_po_tokens(&self, idents: &[&str]) -> Result<(Vec<String>, OffsetDateTime), Error> {
|
||||
let bg = self
|
||||
.client
|
||||
.inner
|
||||
.botguard
|
||||
.as_ref()
|
||||
.ok_or(ExtractionError::Botguard("not enabled".into()))?;
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
let cmd = tokio::process::Command::new(&bg.program)
|
||||
.arg("--snapshot-file")
|
||||
|
|
@ -2079,28 +2110,62 @@ impl RustyPipeQuery {
|
|||
|
||||
let output = String::from_utf8(cmd.stdout)
|
||||
.map_err(|e| Error::Extraction(ExtractionError::Botguard(e.to_string().into())))?;
|
||||
let tokens = output
|
||||
.split_whitespace()
|
||||
.take(idents.len())
|
||||
.map(str::to_owned)
|
||||
.collect::<Vec<_>>();
|
||||
if tokens.len() != idents.len() {
|
||||
return Err(Error::Extraction(ExtractionError::Botguard(
|
||||
"too few tokens returned".into(),
|
||||
)));
|
||||
|
||||
let mut words = output.split_whitespace();
|
||||
let mut tokens = Vec::with_capacity(idents.len());
|
||||
for _ in 0..idents.len() {
|
||||
tokens.push(
|
||||
words
|
||||
.next()
|
||||
.ok_or(ExtractionError::Botguard("too few tokens returned".into()))?
|
||||
.to_owned(),
|
||||
);
|
||||
}
|
||||
|
||||
let mut valid_until = None;
|
||||
for word in words {
|
||||
if let Some((k, v)) = word.split_once('=') {
|
||||
if k == "valid_until" {
|
||||
valid_until = Some(
|
||||
v.parse::<i64>()
|
||||
.ok()
|
||||
.and_then(|x| OffsetDateTime::from_unix_timestamp(x).ok())
|
||||
.ok_or(ExtractionError::Botguard(
|
||||
format!("invalid validity date: {v}").into(),
|
||||
))?,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tracing::debug!("generated PO token (took {:?})", start.elapsed());
|
||||
Ok(Some(tokens))
|
||||
Ok((
|
||||
tokens,
|
||||
valid_until.unwrap_or_else(|| OffsetDateTime::now_utc() + time::Duration::hours(12)),
|
||||
))
|
||||
}
|
||||
|
||||
async fn get_session_po_token(&self, visitor_data: &str) -> Result<PoToken, Error> {
|
||||
if let Some(po_token) = self.client.inner.visitor_data_cache.get_pot(visitor_data) {
|
||||
return Ok(po_token);
|
||||
}
|
||||
|
||||
let po_token = self.get_po_token(visitor_data).await?;
|
||||
self.client
|
||||
.inner
|
||||
.visitor_data_cache
|
||||
.store_pot(visitor_data, po_token.clone());
|
||||
Ok(po_token)
|
||||
}
|
||||
|
||||
/// Get a PO token
|
||||
pub async fn get_po_token<S: AsRef<str>>(self, ident: S) -> Result<String, Error> {
|
||||
self.get_po_tokens(&[ident.as_ref()])
|
||||
.await?
|
||||
.ok_or(Error::Extraction(ExtractionError::Botguard(
|
||||
"not enabled".into(),
|
||||
)))
|
||||
.map(|res| res.into_iter().next().unwrap())
|
||||
pub async fn get_po_token<S: AsRef<str>>(&self, ident: S) -> Result<PoToken, Error> {
|
||||
let (tokens, valid_until) = self.get_po_tokens(&[ident.as_ref()]).await?;
|
||||
|
||||
Ok(PoToken {
|
||||
po_token: tokens.into_iter().next().unwrap(),
|
||||
valid_until,
|
||||
})
|
||||
}
|
||||
|
||||
async fn yt_request_attempt<R: DeserializeOwned + MapResponse<M> + Debug, M>(
|
||||
|
|
@ -2393,7 +2458,7 @@ struct MapRespCtx<'a> {
|
|||
client_type: ClientType,
|
||||
artist: Option<ArtistId>,
|
||||
authenticated: bool,
|
||||
session_po_token: Option<&'a str>,
|
||||
session_po_token: Option<PoToken>,
|
||||
}
|
||||
|
||||
/// Options to give to the mapper when making requests;
|
||||
|
|
@ -2404,7 +2469,7 @@ struct MapRespOptions<'a> {
|
|||
deobf: Option<&'a DeobfData>,
|
||||
artist: Option<ArtistId>,
|
||||
unlocalized: bool,
|
||||
session_po_token: Option<&'a str>,
|
||||
session_po_token: Option<PoToken>,
|
||||
}
|
||||
|
||||
#[allow(clippy::needless_lifetimes)]
|
||||
|
|
@ -2497,8 +2562,13 @@ mod tests {
|
|||
let po_token = rp.query().get_po_token(ident).await.unwrap();
|
||||
|
||||
let token_bts = data_encoding::BASE64URL
|
||||
.decode(po_token.as_bytes())
|
||||
.decode(po_token.po_token.as_bytes())
|
||||
.unwrap();
|
||||
assert_eq!(token_bts.len(), ident.len() + 74);
|
||||
assert!(
|
||||
po_token.valid_until > OffsetDateTime::now_utc() + time::Duration::minutes(30),
|
||||
"valid until {}",
|
||||
po_token.valid_until
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ use std::{
|
|||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use serde::Serialize;
|
||||
use time::OffsetDateTime;
|
||||
use url::Url;
|
||||
|
||||
use crate::{
|
||||
|
|
@ -25,7 +26,7 @@ use super::{
|
|||
self,
|
||||
player::{self, Format},
|
||||
},
|
||||
ClientType, MapRespCtx, MapRespOptions, MapResponse, MapResult, RustyPipeQuery,
|
||||
ClientType, MapRespCtx, MapRespOptions, MapResponse, MapResult, PoToken, RustyPipeQuery,
|
||||
};
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
|
|
@ -142,6 +143,34 @@ impl RustyPipeQuery {
|
|||
Err(last_e.unwrap_or(Error::Other("no clients".into())))
|
||||
}
|
||||
|
||||
async fn get_player_po_token(
|
||||
&self,
|
||||
video_id: &str,
|
||||
visitor_data: &str,
|
||||
) -> Result<(Option<ServiceIntegrity>, Option<PoToken>), Error> {
|
||||
if let Some(bg) = &self.client.inner.botguard {
|
||||
if bg.po_token_cache {
|
||||
let session_token = self.get_session_po_token(visitor_data).await?;
|
||||
Ok((None, Some(session_token)))
|
||||
} else {
|
||||
let (po_tokens, valid_until) =
|
||||
self.get_po_tokens(&[video_id, visitor_data]).await?;
|
||||
let mut po_tokens = po_tokens.into_iter();
|
||||
let po_token = po_tokens.next().unwrap();
|
||||
let session_po_token = po_tokens.next().unwrap();
|
||||
Ok((
|
||||
Some(ServiceIntegrity { po_token }),
|
||||
Some(PoToken {
|
||||
po_token: session_po_token,
|
||||
valid_until,
|
||||
}),
|
||||
))
|
||||
}
|
||||
} else {
|
||||
Ok((None, None))
|
||||
}
|
||||
}
|
||||
|
||||
/// Get YouTube player data (video/audio streams + basic metadata) using the specified client
|
||||
#[tracing::instrument(skip(self), level = "error")]
|
||||
pub async fn player_from_client<S: AsRef<str> + Debug>(
|
||||
|
|
@ -150,7 +179,6 @@ impl RustyPipeQuery {
|
|||
client_type: ClientType,
|
||||
) -> Result<VideoPlayer, Error> {
|
||||
let video_id = video_id.as_ref();
|
||||
|
||||
let visitor_data = self.get_visitor_data(false).await?;
|
||||
|
||||
let (deobf, (service_integrity_dimensions, session_po_token)) = tokio::try_join!(
|
||||
|
|
@ -163,15 +191,7 @@ impl RustyPipeQuery {
|
|||
},
|
||||
async {
|
||||
if client_type.needs_po_token() {
|
||||
if let Some(po_tokens) = self.get_po_tokens(&[video_id, &visitor_data]).await? {
|
||||
let mut po_tokens = po_tokens.into_iter();
|
||||
let po_token = po_tokens.next().unwrap();
|
||||
let session_po_token = po_tokens.next().unwrap();
|
||||
|
||||
Ok((Some(ServiceIntegrity { po_token }), Some(session_po_token)))
|
||||
} else {
|
||||
Ok((None, None))
|
||||
}
|
||||
self.get_player_po_token(video_id, &visitor_data).await
|
||||
} else {
|
||||
Ok((None, None))
|
||||
}
|
||||
|
|
@ -203,7 +223,7 @@ impl RustyPipeQuery {
|
|||
visitor_data: Some(&visitor_data),
|
||||
deobf: deobf.as_ref(),
|
||||
unlocalized: true,
|
||||
session_po_token: session_po_token.as_deref(),
|
||||
session_po_token,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
|
|
@ -369,7 +389,10 @@ impl MapResponse<VideoPlayer> for response::Player {
|
|||
};
|
||||
|
||||
let streams = if !is_live {
|
||||
let mut mapper = StreamsMapper::new(ctx.deobf, ctx.session_po_token)?;
|
||||
let mut mapper = StreamsMapper::new(
|
||||
ctx.deobf,
|
||||
ctx.session_po_token.as_ref().map(|t| t.po_token.as_str()),
|
||||
)?;
|
||||
mapper.map_streams(streaming_data.formats);
|
||||
mapper.map_streams(streaming_data.adaptive_formats);
|
||||
let mut res = mapper.output()?;
|
||||
|
|
@ -461,6 +484,12 @@ impl MapResponse<VideoPlayer> for response::Player {
|
|||
drm_session_id,
|
||||
});
|
||||
|
||||
let mut valid_until = OffsetDateTime::now_utc()
|
||||
+ time::Duration::seconds(streaming_data.expires_in_seconds.into());
|
||||
if let Some(pot) = &ctx.session_po_token {
|
||||
valid_until = valid_until.min(pot.valid_until);
|
||||
}
|
||||
|
||||
Ok(MapResult {
|
||||
c: VideoPlayer {
|
||||
details: video_info,
|
||||
|
|
@ -469,6 +498,7 @@ impl MapResponse<VideoPlayer> for response::Player {
|
|||
audio_streams: streams.audio_streams,
|
||||
subtitles,
|
||||
expires_in_seconds: streaming_data.expires_in_seconds,
|
||||
valid_until,
|
||||
hls_manifest_url: streaming_data.hls_manifest_url,
|
||||
dash_manifest_url: streaming_data.dash_manifest_url,
|
||||
preview_frames,
|
||||
|
|
@ -940,17 +970,8 @@ mod tests {
|
|||
"deserialization/mapping warnings: {:?}",
|
||||
map_res.warnings
|
||||
);
|
||||
let is_desktop = name == "desktop" || name == "desktopmusic";
|
||||
insta::assert_ron_snapshot!(format!("map_player_data_{name}"), map_res.c, {
|
||||
".details.publish_date" => insta::dynamic_redaction(move |value, _path| {
|
||||
if is_desktop {
|
||||
assert!(value.as_str().unwrap().starts_with("2019-05-30T00:00:00"));
|
||||
"2019-05-30T00:00:00"
|
||||
} else {
|
||||
assert_eq!(value, insta::internals::Content::None);
|
||||
"~"
|
||||
}
|
||||
}),
|
||||
".valid_until" => "[date]"
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -462,6 +462,7 @@ VideoPlayer(
|
|||
),
|
||||
],
|
||||
expires_in_seconds: 21540,
|
||||
valid_until: "[date]",
|
||||
hls_manifest_url: None,
|
||||
dash_manifest_url: Some("https://manifest.googlevideo.com/api/manifest/dash/expire/1659481355/ei/q1jpYtOPEYSBgQeHmqbwAQ/ip/2003%3Ade%3Aaf0e%3A2f00%3Ade47%3A297%3Aa6db%3A774e/id/a4fbddf14c6649b4/source/youtube/requiressl/yes/playback_host/rr5---sn-h0jeenek.googlevideo.com/mh/mQ/mm/31%2C29/mn/sn-h0jeenek%2Csn-h0jelnez/ms/au%2Crdu/mv/m/mvi/5/pl/37/hfr/1/as/fmp4_audio_clear%2Cfmp4_sd_hd_clear/initcwndbps/1527500/vprv/1/mt/1659459429/fvip/4/itag_bl/376%2C377%2C384%2C385%2C612%2C613%2C617%2C619%2C623%2C628%2C655%2C656%2C660%2C662%2C666%2C671/keepalive/yes/fexp/24001373%2C24007246/itag/0/sparams/expire%2Cei%2Cip%2Cid%2Csource%2Crequiressl%2Chfr%2Cas%2Cvprv%2Citag/sig/AOq0QJ8wRAIgMm4a_MIHA3YUszKeruSy3exs5JwNjJAyLAwxL0yPdNMCIANb9GDMSTp_NT-PPhbvYMwRULJ5a9BO6MYD9FuWprC1/lsparams/playback_host%2Cmh%2Cmm%2Cmn%2Cms%2Cmv%2Cmvi%2Cpl%2Cinitcwndbps/lsig/AG3C_xAwRQIgETSOwhwWVMy7gmrFXZlJu655ToLzSwOEsT16oRyrWhACIQDkvOEw1fImz5omu4iVIRNFe-z-JC9v8WUyx281dW2NOw%3D%3D"),
|
||||
preview_frames: [
|
||||
|
|
|
|||
|
|
@ -585,6 +585,7 @@ VideoPlayer(
|
|||
),
|
||||
],
|
||||
expires_in_seconds: 21540,
|
||||
valid_until: "[date]",
|
||||
hls_manifest_url: None,
|
||||
dash_manifest_url: Some("https://manifest.googlevideo.com/api/manifest/dash/expire/1659481355/ei/q1jpYtq3BJCX1gKVyJGQDg/ip/2003%3Ade%3Aaf0e%3A2f00%3Ade47%3A297%3Aa6db%3A774e/id/a4fbddf14c6649b4/source/youtube/requiressl/yes/playback_host/rr4---sn-h0jelnez.googlevideo.com/mh/mQ/mm/31%2C26/mn/sn-h0jelnez%2Csn-4g5edn6k/ms/au%2Conr/mv/m/mvi/4/pl/37/hfr/all/as/fmp4_audio_clear%2Cwebm_audio_clear%2Cwebm2_audio_clear%2Cfmp4_sd_hd_clear%2Cwebm2_sd_hd_clear/initcwndbps/1513750/spc/lT-KhrZGE2opztWyVdAtyUNlb8dXPDs/vprv/1/mt/1659459429/fvip/4/keepalive/yes/fexp/24001373%2C24007246/itag/0/sparams/expire%2Cei%2Cip%2Cid%2Csource%2Crequiressl%2Chfr%2Cas%2Cspc%2Cvprv%2Citag/sig/AOq0QJ8wRgIhAPEjHK19PKVHqQeia6WF4qubuMYk74LGi8F8lk5ZMPkFAiEAsaB2pKQWBvuPnNUnbdQXHc-izgsHJUP793woC2xNJlg%3D/lsparams/playback_host%2Cmh%2Cmm%2Cmn%2Cms%2Cmv%2Cmvi%2Cpl%2Cinitcwndbps/lsig/AG3C_xAwRQIgOY4xu4H9wqPVZ7vF2i0hFcOnqrur1XGoA43a7ZEuuSUCIQCyPxBKXUQrKFmknNEGpX5GSWySKgMw_xHBikWpKpKwvg%3D%3D"),
|
||||
preview_frames: [
|
||||
|
|
|
|||
|
|
@ -397,6 +397,7 @@ VideoPlayer(
|
|||
),
|
||||
],
|
||||
expires_in_seconds: 21540,
|
||||
valid_until: "[date]",
|
||||
hls_manifest_url: None,
|
||||
dash_manifest_url: Some("https://manifest.googlevideo.com/api/manifest/dash/expire/1659487474/ei/knDpYub6BojEgAf6jbLgDw/ip/2003%3Ade%3Aaf0e%3A2f00%3Ade47%3A297%3Aa6db%3A774e/id/a4fbddf14c6649b4/source/youtube/requiressl/yes/playback_host/rr5---sn-h0jeenek.googlevideo.com/mh/mQ/mm/31%2C29/mn/sn-h0jeenek%2Csn-h0jelnez/ms/au%2Crdu/mv/m/mvi/5/pl/37/hfr/all/as/fmp4_audio_clear%2Cwebm_audio_clear%2Cwebm2_audio_clear%2Cfmp4_sd_hd_clear%2Cwebm2_sd_hd_clear/initcwndbps/1418750/spc/lT-Khox4YuJQ2wmH79zYALRvsWTPCUc/vprv/1/mt/1659465669/fvip/4/keepalive/yes/fexp/24001373%2C24007246/itag/0/sparams/expire%2Cei%2Cip%2Cid%2Csource%2Crequiressl%2Chfr%2Cas%2Cspc%2Cvprv%2Citag/sig/AOq0QJ8wRAIgErABhAEaoKHUDu9dDbpxE_8gR4b8WWAi61fnu8UKnuICIEYrEKcHvqHdO4V3R7cvSGwi_HGH34IlQsKbziOfMBov/lsparams/playback_host%2Cmh%2Cmm%2Cmn%2Cms%2Cmv%2Cmvi%2Cpl%2Cinitcwndbps/lsig/AG3C_xAwRQIgJxHmH0Sxo3cY_pW_ZzQ3hW9-7oz6K_pZWcUdrDDQ2sQCIQDJYNINQwLgKelgbO3CZYx7sMxdUAFpWdokmRBQ77vwvw%3D%3D"),
|
||||
preview_frames: [
|
||||
|
|
|
|||
|
|
@ -168,6 +168,7 @@ VideoPlayer(
|
|||
),
|
||||
],
|
||||
expires_in_seconds: 21540,
|
||||
valid_until: "[date]",
|
||||
hls_manifest_url: Some("https://manifest.googlevideo.com/api/manifest/hls_variant/expire/1659481355/ei/q1jpYq-xHs7NgQev0bfwAQ/ip/2003%3Ade%3Aaf0e%3A2f00%3Ade47%3A297%3Aa6db%3A774e/id/a4fbddf14c6649b4/source/youtube/requiressl/yes/playback_host/rr4---sn-h0jelnez.googlevideo.com/mh/mQ/mm/31%2C29/mn/sn-h0jelnez%2Csn-h0jeenek/ms/au%2Crdu/mv/m/mvi/4/pl/37/hfr/1/demuxed/1/tts_caps/1/maudio/1/initcwndbps/1513750/vprv/1/go/1/mt/1659459429/fvip/5/nvgoi/1/short_key/1/ncsapi/1/keepalive/yes/fexp/24001373%2C24007246/dover/13/itag/0/playlist_type/DVR/sparams/expire%2Cei%2Cip%2Cid%2Csource%2Crequiressl%2Chfr%2Cdemuxed%2Ctts_caps%2Cmaudio%2Cvprv%2Cgo%2Citag%2Cplaylist_type/sig/AOq0QJ8wRQIhAIYnEHvIgJtJ8hehAXNtVY3qsgsq_GdOhWf2hkJZe6lCAiBxaRY_nubYp6hBizcAg_KFkKnkG-t2XYLRQ5wGdM3AjA%3D%3D/lsparams/playback_host%2Cmh%2Cmm%2Cmn%2Cms%2Cmv%2Cmvi%2Cpl%2Cinitcwndbps/lsig/AG3C_xAwRgIhAM_91Kk_0VLuSsR6nLCY7LdtWojyRAzXSScd_X9ShRROAiEA1AF4VY04F71NsAI8_j3iqjuXnWL9s6NoXHq7P8-bHx8%3D/file/index.m3u8"),
|
||||
dash_manifest_url: None,
|
||||
preview_frames: [
|
||||
|
|
|
|||
|
|
@ -517,6 +517,7 @@ VideoPlayer(
|
|||
),
|
||||
],
|
||||
expires_in_seconds: 21540,
|
||||
valid_until: "[date]",
|
||||
hls_manifest_url: None,
|
||||
dash_manifest_url: None,
|
||||
preview_frames: [
|
||||
|
|
|
|||
|
|
@ -136,7 +136,13 @@ pub struct VideoPlayer {
|
|||
/// List of subtitles
|
||||
pub subtitles: Vec<Subtitle>,
|
||||
/// Lifetime of the stream URLs in seconds
|
||||
///
|
||||
/// **Note:** use the `valid_until` value to check if the stream URLs are still valid,
|
||||
/// since it takes PO token lifetime into account.
|
||||
pub expires_in_seconds: u32,
|
||||
/// Date until which the stream URLs are valid
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub valid_until: OffsetDateTime,
|
||||
/// HLS manifest URL (for livestreams)
|
||||
pub hls_manifest_url: Option<String>,
|
||||
/// Dash manifest URL (for livestreams)
|
||||
|
|
|
|||
|
|
@ -1,12 +1,16 @@
|
|||
use std::sync::{atomic::AtomicU32, Arc, RwLock};
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
sync::{atomic::AtomicU32, Arc, RwLock},
|
||||
};
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
use rand::Rng;
|
||||
use regex::Regex;
|
||||
use reqwest::{header, Client};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::{
|
||||
client::{CONSENT_COOKIE, YOUTUBE_MUSIC_HOME_URL},
|
||||
client::{PoToken, CONSENT_COOKIE, YOUTUBE_MUSIC_HOME_URL},
|
||||
error::{Error, ExtractionError},
|
||||
util,
|
||||
};
|
||||
|
|
@ -29,23 +33,27 @@ pub struct VisitorDataCache {
|
|||
struct VisitorDataCacheRef {
|
||||
req_counter: AtomicU32,
|
||||
visitor_data: RwLock<Vec<String>>,
|
||||
session_potoken: RwLock<HashMap<String, PoToken>>,
|
||||
http: Client,
|
||||
/// Number of requests after which a new token is requested
|
||||
req_limit: u32,
|
||||
/// Maximum size of the cache
|
||||
max_size: usize,
|
||||
}
|
||||
|
||||
static VISITOR_DATA_REGEX: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new(r#""visitorData":"([\w\d_\-%]+?)""#).unwrap());
|
||||
/// Number of requests after which a new token is requested
|
||||
const REQ_LIMIT: u32 = 50;
|
||||
/// Maximum size of the cache
|
||||
const MAX_SIZE: usize = 20;
|
||||
|
||||
impl VisitorDataCache {
|
||||
pub fn new(http: Client) -> Self {
|
||||
pub fn new(http: Client, req_limit: u32, max_size: usize) -> Self {
|
||||
Self {
|
||||
inner: VisitorDataCacheRef {
|
||||
req_counter: Default::default(),
|
||||
visitor_data: Default::default(),
|
||||
session_potoken: Default::default(),
|
||||
http,
|
||||
req_limit,
|
||||
max_size: max_size - 1,
|
||||
}
|
||||
.into(),
|
||||
}
|
||||
|
|
@ -107,8 +115,12 @@ impl VisitorDataCache {
|
|||
.req_counter
|
||||
.store(0, std::sync::atomic::Ordering::Relaxed);
|
||||
let mut vds = self.inner.visitor_data.write().unwrap();
|
||||
for _ in 0..(vds.len().saturating_sub(MAX_SIZE)) {
|
||||
for _ in 0..(vds.len().saturating_sub(self.inner.max_size)) {
|
||||
let rem = vds.remove(0);
|
||||
{
|
||||
let mut pots = self.inner.session_potoken.write().unwrap();
|
||||
pots.remove(&rem);
|
||||
}
|
||||
tracing::debug!("visitor data {rem} removed from cache");
|
||||
}
|
||||
vds.push(vd.to_owned());
|
||||
|
|
@ -122,7 +134,7 @@ impl VisitorDataCache {
|
|||
.inner
|
||||
.req_counter
|
||||
.fetch_add(1, std::sync::atomic::Ordering::Relaxed)
|
||||
>= REQ_LIMIT
|
||||
>= self.inner.req_limit
|
||||
{
|
||||
self.inner
|
||||
.req_counter
|
||||
|
|
@ -148,9 +160,26 @@ impl VisitorDataCache {
|
|||
let mut vds = self.inner.visitor_data.write().unwrap();
|
||||
if let Some(i) = vds.iter().position(|x| x == visitor_data) {
|
||||
vds.remove(i);
|
||||
let mut pots = self.inner.session_potoken.write().unwrap();
|
||||
pots.remove(visitor_data);
|
||||
tracing::debug!("visitor data {visitor_data} removed from cache");
|
||||
}
|
||||
}
|
||||
|
||||
pub fn store_pot(&self, visitor_data: &str, po_token: PoToken) {
|
||||
let mut pots = self.inner.session_potoken.write().unwrap();
|
||||
pots.insert(visitor_data.to_owned(), po_token);
|
||||
}
|
||||
|
||||
pub fn get_pot(&self, visitor_data: &str) -> Option<PoToken> {
|
||||
let pots = self.inner.session_potoken.read().unwrap();
|
||||
if let Some(entry) = pots.get(visitor_data) {
|
||||
if entry.valid_until > OffsetDateTime::now_utc() {
|
||||
return Some(entry.clone());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -166,13 +195,16 @@ mod tests {
|
|||
#[tokio::test]
|
||||
#[traced_test]
|
||||
async fn get_visitor_data() {
|
||||
let cache =
|
||||
VisitorDataCache::new(Client::builder().user_agent(DEFAULT_UA).build().unwrap());
|
||||
let cache = VisitorDataCache::new(
|
||||
Client::builder().user_agent(DEFAULT_UA).build().unwrap(),
|
||||
2,
|
||||
2,
|
||||
);
|
||||
// Get initial visitor data
|
||||
let v1 = cache.get().await.unwrap();
|
||||
|
||||
// Run as many request as necessary to fetch second visitor data
|
||||
for _ in 0..=REQ_LIMIT {
|
||||
for _ in 0..=cache.inner.req_limit {
|
||||
let got = cache.get().await.unwrap();
|
||||
assert_eq!(got, v1);
|
||||
}
|
||||
|
|
@ -186,4 +218,32 @@ mod tests {
|
|||
let vds_len = cache.inner.visitor_data.read().unwrap().len();
|
||||
assert_eq!(vds_len, 2);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[traced_test]
|
||||
async fn cache_potoken() {
|
||||
let cache = VisitorDataCache::new(
|
||||
Client::builder().user_agent(DEFAULT_UA).build().unwrap(),
|
||||
1,
|
||||
2,
|
||||
);
|
||||
let v1 = cache.get().await.unwrap();
|
||||
let pot1 = PoToken {
|
||||
po_token: "pot1".to_owned(),
|
||||
valid_until: OffsetDateTime::now_utc() + time::Duration::hours(1),
|
||||
};
|
||||
cache.store_pot(&v1, pot1.clone());
|
||||
assert_eq!(cache.get_pot(&v1).unwrap(), pot1);
|
||||
|
||||
for _ in 0..4 {
|
||||
cache.get().await.unwrap();
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(1000)).await;
|
||||
|
||||
{
|
||||
let vd = cache.inner.visitor_data.read().unwrap();
|
||||
assert!(!vd.contains(&v1), "first token still present");
|
||||
}
|
||||
assert_eq!(cache.get_pot(&v1), None);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1650,6 +1650,7 @@
|
|||
}
|
||||
],
|
||||
"expires_in_seconds": 21540,
|
||||
"valid_until": "2025-01-16T12:47:01Z",
|
||||
"hls_manifest_url": null,
|
||||
"dash_manifest_url": "https://manifest.googlevideo.com/api/manifest/dash/expire/1737003630/ei/Dj6IZ_uSLs3l6dsPjd_aWQ/ip/93.235.184.108/id/5c0488f533287530/source/youtube/requiressl/yes/xpc/EgVo2aDSNQ%3D%3D/playback_host/rr5---sn-h0jeener.googlevideo.com/met/1736982030%2C/mh/3d/mm/31%2C29/mn/sn-h0jeener%2Csn-h0jelnes/ms/au%2Crdu/mv/m/mvi/5/pl/26/rms/au%2Cau/tx/51357437/txs/51357435%2C51357436%2C51357437/ctier/A/as/fmp4_audio_cenc%2Cfmp4_sd_hd_cenc/pfa/5/gcr/de/initcwndbps/2801250/hightc/yes/siu/1/spc/9kzgDTo16Q_mO7TFjJcMOcNa4IBGqdJV3_zJD2blPLtGQWHzV12Pjt9HGSUEzE5EuxsT3KGLQTHgHKI/vprv/1/rqh/2/mt/1736981586/fvip/4/keepalive/yes/fexp/51326932%2C51335594%2C51353498%2C51355912%2C51384461/itag/0/sparams/expire%2Cei%2Cip%2Cid%2Csource%2Crequiressl%2Cxpc%2Ctx%2Ctxs%2Cctier%2Cas%2Cpfa%2Cgcr%2Chightc%2Csiu%2Cspc%2Cvprv%2Crqh%2Citag/sig/AJfQdSswRAIgRGOt6B9XX33VLI3cBW4IJBjHtjzvGSwSUz5lGrFpegsCIBSRvvj21BsdHibsMhod72dm_FCzt3VZtKP61w-r5n2-/lsparams/playback_host%2Cmet%2Cmh%2Cmm%2Cmn%2Cms%2Cmv%2Cmvi%2Cpl%2Crms%2Cinitcwndbps/lsig/AGluJ3MwRQIhALUqrxIiML5XnPfd9h-nywvh8crYVxT_hobOiytW73FFAiBMWQt9t34-kNtSkoiYgyJRfl3Z1bwu6Ss8d2lOZp1ZjA%3D%3D",
|
||||
"preview_frames": [],
|
||||
|
|
|
|||
|
|
@ -1169,6 +1169,7 @@
|
|||
],
|
||||
"subtitles": [],
|
||||
"expires_in_seconds": 21540,
|
||||
"valid_until": "2025-01-16T12:47:01Z",
|
||||
"hls_manifest_url": null,
|
||||
"dash_manifest_url": null,
|
||||
"drm": null,
|
||||
|
|
|
|||
|
|
@ -2187,6 +2187,7 @@
|
|||
}
|
||||
],
|
||||
"expires_in_seconds": 21540,
|
||||
"valid_until": "2025-01-16T12:47:01Z",
|
||||
"hls_manifest_url": null,
|
||||
"dash_manifest_url": null,
|
||||
"preview_frames": [],
|
||||
|
|
|
|||
|
|
@ -506,6 +506,7 @@
|
|||
],
|
||||
"subtitles": [],
|
||||
"expires_in_seconds": 21540,
|
||||
"valid_until": "2025-01-16T12:47:01Z",
|
||||
"hls_manifest_url": null,
|
||||
"dash_manifest_url": null,
|
||||
"preview_frames": [
|
||||
|
|
|
|||
|
|
@ -2844,6 +2844,7 @@ fn rp(lang: Language) -> RustyPipe {
|
|||
.storage_dir(env!("CARGO_MANIFEST_DIR"))
|
||||
.lang(lang)
|
||||
.visitor_data_opt(vdata)
|
||||
.po_token_cache()
|
||||
.build()
|
||||
.unwrap()
|
||||
}
|
||||
|
|
|
|||
Reference in a new issue