diff --git a/README.md b/README.md index ea39e8d..a78b54e 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ inspired by [NewPipe](https://github.com/TeamNewPipe/NewPipeExtractor). - [X] **Search** (with filters) - [X] **Search suggestions** - [X] **Trending** -- [ ] **URL resolver** +- [X] **URL resolver** ### YouTube Music diff --git a/src/client/channel.rs b/src/client/channel.rs index e28eb55..53fab90 100644 --- a/src/client/channel.rs +++ b/src/client/channel.rs @@ -237,11 +237,10 @@ impl MapResponse> for response::Channel { links: meta .primary_links .into_iter() - .map(|l| { - ( - l.title, - util::sanitize_yt_url(&l.navigation_endpoint.url_endpoint.url), - ) + .filter_map(|l| { + l.navigation_endpoint + .url_endpoint + .map(|url| (l.title, util::sanitize_yt_url(&url.url))) }) .collect(), }) diff --git a/src/client/mod.rs b/src/client/mod.rs index cf80d91..1ada971 100644 --- a/src/client/mod.rs +++ b/src/client/mod.rs @@ -1,12 +1,14 @@ //! YouTube API Client +pub(crate) mod response; + mod channel; mod pagination; mod player; mod playlist; -mod response; mod search; mod trends; +mod url_resolver; mod video_details; #[cfg(feature = "rss")] @@ -1077,9 +1079,16 @@ impl RustyPipeQuery { }; if status.is_client_error() || status.is_server_error() { - let e = Error::HttpStatus(status.into()); - create_report(Level::ERR, Some(e.to_string()), vec![]); - return Err(e); + let status_code = status.as_u16(); + return if status_code == 404 { + Err(Error::Extraction(ExtractionError::ContentUnavailable( + "Not found".into(), + ))) + } else { + let e = Error::HttpStatus(status_code); + create_report(Level::ERR, Some(e.to_string()), vec![]); + Err(e) + }; } match serde_json::from_str::(&resp_str) { diff --git a/src/client/player.rs b/src/client/player.rs index a22e4c0..f70e936 100644 --- a/src/client/player.rs +++ b/src/client/player.rs @@ -285,7 +285,7 @@ impl MapResponse for response::Player { fn cipher_to_url_params( signature_cipher: &str, deobf: &Deobfuscator, -) -> Result<(String, BTreeMap), DeobfError> { +) -> Result<(Url, BTreeMap), DeobfError> { let params: HashMap, Cow> = url::form_urlencoded::parse(signature_cipher.as_bytes()).collect(); diff --git a/src/client/response/channel.rs b/src/client/response/channel.rs index 88a2102..0e730ec 100644 --- a/src/client/response/channel.rs +++ b/src/client/response/channel.rs @@ -2,6 +2,7 @@ use serde::Deserialize; use serde_with::serde_as; use serde_with::{DefaultOnError, VecSkipError}; +use super::url_endpoint::NavigationEndpoint; use super::Thumbnails; use super::{Alert, ChannelBadge}; use super::{ContentRenderer, ContentsRenderer, VideoListItem}; @@ -205,18 +206,6 @@ pub struct PrimaryLink { pub navigation_endpoint: NavigationEndpoint, } -#[derive(Debug, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct NavigationEndpoint { - pub url_endpoint: UrlEndpoint, -} - -#[derive(Debug, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct UrlEndpoint { - pub url: String, -} - #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub struct OnResponseReceivedAction { diff --git a/src/client/response/mod.rs b/src/client/response/mod.rs index 6620509..02bfa31 100644 --- a/src/client/response/mod.rs +++ b/src/client/response/mod.rs @@ -4,6 +4,7 @@ pub mod playlist; pub mod playlist_music; pub mod search; pub mod trends; +pub mod url_endpoint; pub mod video_details; pub use channel::Channel; @@ -17,6 +18,7 @@ pub use search::SearchCont; pub use trends::Startpage; pub use trends::StartpageCont; pub use trends::Trending; +pub use url_endpoint::ResolvedUrl; pub use video_details::VideoComments; pub use video_details::VideoDetails; pub use video_details::VideoRecommendations; diff --git a/src/client/response/url_endpoint.rs b/src/client/response/url_endpoint.rs new file mode 100644 index 0000000..c2c7dff --- /dev/null +++ b/src/client/response/url_endpoint.rs @@ -0,0 +1,100 @@ +use serde::Deserialize; +use serde_with::{serde_as, DefaultOnError}; + +use crate::model::UrlTarget; + +/// navigation/resolve_url response model +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ResolvedUrl { + pub endpoint: NavigationEndpoint, +} + +#[serde_as] +#[derive(Debug, Deserialize, Default)] +#[serde(rename_all = "camelCase")] +pub struct NavigationEndpoint { + #[serde(default)] + #[serde_as(deserialize_as = "DefaultOnError")] + pub watch_endpoint: Option, + #[serde(default)] + #[serde_as(deserialize_as = "DefaultOnError")] + pub browse_endpoint: Option, + #[serde(default)] + #[serde_as(deserialize_as = "DefaultOnError")] + pub url_endpoint: Option, + #[serde(default)] + #[serde_as(deserialize_as = "DefaultOnError")] + pub command_metadata: Option, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct WatchEndpoint { + pub video_id: String, + #[serde(default)] + pub start_time_seconds: u32, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct BrowseEndpoint { + pub browse_id: String, + pub browse_endpoint_context_supported_configs: Option, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct UrlEndpoint { + pub url: String, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct BrowseEndpointConfig { + pub browse_endpoint_context_music_config: BrowseEndpointMusicConfig, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct BrowseEndpointMusicConfig { + pub page_type: PageType, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct CommandMetadata { + pub web_command_metadata: WebCommandMetadata, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct WebCommandMetadata { + pub web_page_type: PageType, +} + +#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Eq)] +pub enum PageType { + #[serde(rename = "MUSIC_PAGE_TYPE_ARTIST")] + Artist, + #[serde(rename = "MUSIC_PAGE_TYPE_ALBUM")] + Album, + #[serde( + rename = "WEB_PAGE_TYPE_CHANNEL", + alias = "MUSIC_PAGE_TYPE_USER_CHANNEL" + )] + Channel, + #[serde(rename = "MUSIC_PAGE_TYPE_PLAYLIST", alias = "WEB_PAGE_TYPE_PLAYLIST")] + Playlist, +} + +impl PageType { + pub fn to_url_target(self, id: String) -> UrlTarget { + match self { + PageType::Artist => UrlTarget::Channel { id }, + PageType::Album => UrlTarget::Playlist { id }, + PageType::Channel => UrlTarget::Channel { id }, + PageType::Playlist => UrlTarget::Playlist { id }, + } + } +} diff --git a/src/client/response/video_details.rs b/src/client/response/video_details.rs index 17087c3..f50baf9 100644 --- a/src/client/response/video_details.rs +++ b/src/client/response/video_details.rs @@ -11,8 +11,8 @@ use crate::serializer::{ }; use super::{ - ContinuationEndpoint, ContinuationItemRenderer, Icon, MusicContinuation, Thumbnails, - VideoListItem, VideoOwner, + url_endpoint::BrowseEndpoint, ContinuationEndpoint, ContinuationItemRenderer, Icon, + MusicContinuation, Thumbnails, VideoListItem, VideoOwner, }; /* @@ -561,12 +561,6 @@ pub struct AuthorEndpoint { pub browse_endpoint: BrowseEndpoint, } -#[derive(Debug, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct BrowseEndpoint { - pub browse_id: String, -} - #[derive(Default, Clone, Copy, Debug, Deserialize, PartialEq, Eq)] #[serde(rename_all = "SCREAMING_SNAKE_CASE")] pub enum CommentPriority { diff --git a/src/client/snapshots/rustypipe__client__video_details__tests__map_video_details_20220924_newdesc.snap b/src/client/snapshots/rustypipe__client__video_details__tests__map_video_details_20220924_newdesc.snap index dfe6581..5cef342 100644 --- a/src/client/snapshots/rustypipe__client__video_details__tests__map_video_details_20220924_newdesc.snap +++ b/src/client/snapshots/rustypipe__client__video_details__tests__map_video_details_20220924_newdesc.snap @@ -12,10 +12,12 @@ VideoDetails( url: "https://smarturl.it/aespa_BlackMamba", ), Text("\n🐍The Debut Stage "), - Video( + YouTube( text: "aespa μ—μŠ€νŒŒ \'Black ...", - id: "Ky5RT5oGg0w", - start_time: 0, + target: Video( + id: "Ky5RT5oGg0w", + start_time: 0, + ), ), Text("\n\n🎟\u{fe0f} aespa Showcase SYNK in LA! Tickets now on sale: "), Web( diff --git a/src/client/snapshots/rustypipe__client__video_details__tests__map_video_details_20221011_new_continuation.snap b/src/client/snapshots/rustypipe__client__video_details__tests__map_video_details_20221011_new_continuation.snap index 2ba158d..baca23f 100644 --- a/src/client/snapshots/rustypipe__client__video_details__tests__map_video_details_20221011_new_continuation.snap +++ b/src/client/snapshots/rustypipe__client__video_details__tests__map_video_details_20221011_new_continuation.snap @@ -12,10 +12,12 @@ VideoDetails( url: "https://smarturl.it/aespa_BlackMamba", ), Text("\n🐍The Debut Stage "), - Video( + YouTube( text: "https://youtu.be/Ky5RT5oGg0w", - id: "Ky5RT5oGg0w", - start_time: 0, + target: Video( + id: "Ky5RT5oGg0w", + start_time: 0, + ), ), Text("\n\n🎟\u{fe0f} aespa Showcase SYNK in LA! Tickets now on sale: "), Web( diff --git a/src/client/snapshots/rustypipe__client__video_details__tests__map_video_details_20221011_rec_isr.snap b/src/client/snapshots/rustypipe__client__video_details__tests__map_video_details_20221011_rec_isr.snap index 65ee576..45d6370 100644 --- a/src/client/snapshots/rustypipe__client__video_details__tests__map_video_details_20221011_rec_isr.snap +++ b/src/client/snapshots/rustypipe__client__video_details__tests__map_video_details_20221011_rec_isr.snap @@ -96,10 +96,12 @@ VideoDetails( Text("\n\nMUSIC CREDIT\n"), Text("-------------------------------------------------"), Text("\nIntro: Laszlo - Supernova\nVideo Link: "), - Video( + YouTube( text: "https://www.youtube.com/watch?v=PKfxm...", - id: "PKfxmFU3lWY", - start_time: 0, + target: Video( + id: "PKfxmFU3lWY", + start_time: 0, + ), ), Text("\niTunes Download Link: "), Web( @@ -112,10 +114,12 @@ VideoDetails( url: "https://soundcloud.com/laszlomusic", ), Text("\n\nOutro: Approaching Nirvana - Sugar High\nVideo Link: "), - Video( + YouTube( text: "https://www.youtube.com/watch?v=ngsGB...", - id: "ngsGBSCDwcI", - start_time: 0, + target: Video( + id: "ngsGBSCDwcI", + start_time: 0, + ), ), Text("\nListen on Spotify: "), Web( @@ -150,88 +154,116 @@ VideoDetails( Text("\n\nCHAPTERS\n"), Text("-------------------------------------------------"), Text("\n"), - Video( + YouTube( text: "0:00", - id: "nFDBxBUfE74", - start_time: 0, + target: Video( + id: "nFDBxBUfE74", + start_time: 0, + ), ), Text(" Intro\n"), - Video( + YouTube( text: "0:42", - id: "nFDBxBUfE74", - start_time: 42, + target: Video( + id: "nFDBxBUfE74", + start_time: 42, + ), ), Text(" The PC Built for Super Efficiency\n"), - Video( + YouTube( text: "2:41", - id: "nFDBxBUfE74", - start_time: 161, + target: Video( + id: "nFDBxBUfE74", + start_time: 161, + ), ), Text(" Our BURIAL ENCLOSURE?!\n"), - Video( + YouTube( text: "3:31", - id: "nFDBxBUfE74", - start_time: 211, + target: Video( + id: "nFDBxBUfE74", + start_time: 211, + ), ), Text(" Our Power Solution (Thanks Jackery!)\n"), - Video( + YouTube( text: "4:47", - id: "nFDBxBUfE74", - start_time: 287, + target: Video( + id: "nFDBxBUfE74", + start_time: 287, + ), ), Text(" Diggin\' Holes\n"), - Video( + YouTube( text: "5:30", - id: "nFDBxBUfE74", - start_time: 330, + target: Video( + id: "nFDBxBUfE74", + start_time: 330, + ), ), Text(" Colonoscopy?\n"), - Video( + YouTube( text: "7:04", - id: "nFDBxBUfE74", - start_time: 424, + target: Video( + id: "nFDBxBUfE74", + start_time: 424, + ), ), Text(" Diggin\' like a man\n"), - Video( + YouTube( text: "8:29", - id: "nFDBxBUfE74", - start_time: 509, + target: Video( + id: "nFDBxBUfE74", + start_time: 509, + ), ), Text(" The world\'s worst woodsman\n"), - Video( + YouTube( text: "9:03", - id: "nFDBxBUfE74", - start_time: 543, + target: Video( + id: "nFDBxBUfE74", + start_time: 543, + ), ), Text(" Backyard cable management\n"), - Video( + YouTube( text: "10:02", - id: "nFDBxBUfE74", - start_time: 602, + target: Video( + id: "nFDBxBUfE74", + start_time: 602, + ), ), Text(" Time to bury this boy\n"), - Video( + YouTube( text: "10:46", - id: "nFDBxBUfE74", - start_time: 646, + target: Video( + id: "nFDBxBUfE74", + start_time: 646, + ), ), Text(" Solar Power Generation\n"), - Video( + YouTube( text: "11:37", - id: "nFDBxBUfE74", - start_time: 697, + target: Video( + id: "nFDBxBUfE74", + start_time: 697, + ), ), Text(" Issues\n"), - Video( + YouTube( text: "12:08", - id: "nFDBxBUfE74", - start_time: 728, + target: Video( + id: "nFDBxBUfE74", + start_time: 728, + ), ), Text(" First Play Test\n"), - Video( + YouTube( text: "13:20", - id: "nFDBxBUfE74", - start_time: 800, + target: Video( + id: "nFDBxBUfE74", + start_time: 800, + ), ), Text(" Conclusion"), ]), diff --git a/src/client/snapshots/rustypipe__client__video_details__tests__map_video_details_chapters.snap b/src/client/snapshots/rustypipe__client__video_details__tests__map_video_details_chapters.snap index 12225df..c14605f 100644 --- a/src/client/snapshots/rustypipe__client__video_details__tests__map_video_details_chapters.snap +++ b/src/client/snapshots/rustypipe__client__video_details__tests__map_video_details_chapters.snap @@ -92,10 +92,12 @@ VideoDetails( url: "https://www.twitch.tv/linustech", ), Text("\n\nMUSIC CREDIT\n---------------------------------------------------\nIntro: Laszlo - Supernova\nVideo Link: "), - Video( + YouTube( text: "https://www.youtube.com/watch?v=PKfxm...", - id: "PKfxmFU3lWY", - start_time: 0, + target: Video( + id: "PKfxmFU3lWY", + start_time: 0, + ), ), Text("\niTunes Download Link: "), Web( @@ -108,10 +110,12 @@ VideoDetails( url: "https://soundcloud.com/laszlomusic", ), Text("\n\nOutro: Approaching Nirvana - Sugar High\nVideo Link: "), - Video( + YouTube( text: "https://www.youtube.com/watch?v=ngsGB...", - id: "ngsGBSCDwcI", - start_time: 0, + target: Video( + id: "ngsGBSCDwcI", + start_time: 0, + ), ), Text("\nListen on Spotify: "), Web( @@ -144,88 +148,116 @@ VideoDetails( url: "https://geni.us/Ps3XfE", ), Text("\n\nCHAPTERS\n---------------------------------------------------\n"), - Video( + YouTube( text: "0:00", - id: "nFDBxBUfE74", - start_time: 0, + target: Video( + id: "nFDBxBUfE74", + start_time: 0, + ), ), Text(" Intro\n"), - Video( + YouTube( text: "0:42", - id: "nFDBxBUfE74", - start_time: 42, + target: Video( + id: "nFDBxBUfE74", + start_time: 42, + ), ), Text(" The PC Built for Super Efficiency\n"), - Video( + YouTube( text: "2:41", - id: "nFDBxBUfE74", - start_time: 161, + target: Video( + id: "nFDBxBUfE74", + start_time: 161, + ), ), Text(" Our BURIAL ENCLOSURE?!\n"), - Video( + YouTube( text: "3:31", - id: "nFDBxBUfE74", - start_time: 211, + target: Video( + id: "nFDBxBUfE74", + start_time: 211, + ), ), Text(" Our Power Solution (Thanks Jackery!)\n"), - Video( + YouTube( text: "4:47", - id: "nFDBxBUfE74", - start_time: 287, + target: Video( + id: "nFDBxBUfE74", + start_time: 287, + ), ), Text(" Diggin\' Holes\n"), - Video( + YouTube( text: "5:30", - id: "nFDBxBUfE74", - start_time: 330, + target: Video( + id: "nFDBxBUfE74", + start_time: 330, + ), ), Text(" Colonoscopy?\n"), - Video( + YouTube( text: "7:04", - id: "nFDBxBUfE74", - start_time: 424, + target: Video( + id: "nFDBxBUfE74", + start_time: 424, + ), ), Text(" Diggin\' like a man\n"), - Video( + YouTube( text: "8:29", - id: "nFDBxBUfE74", - start_time: 509, + target: Video( + id: "nFDBxBUfE74", + start_time: 509, + ), ), Text(" The world\'s worst woodsman\n"), - Video( + YouTube( text: "9:03", - id: "nFDBxBUfE74", - start_time: 543, + target: Video( + id: "nFDBxBUfE74", + start_time: 543, + ), ), Text(" Backyard cable management\n"), - Video( + YouTube( text: "10:02", - id: "nFDBxBUfE74", - start_time: 602, + target: Video( + id: "nFDBxBUfE74", + start_time: 602, + ), ), Text(" Time to bury this boy\n"), - Video( + YouTube( text: "10:46", - id: "nFDBxBUfE74", - start_time: 646, + target: Video( + id: "nFDBxBUfE74", + start_time: 646, + ), ), Text(" Solar Power Generation\n"), - Video( + YouTube( text: "11:37", - id: "nFDBxBUfE74", - start_time: 697, + target: Video( + id: "nFDBxBUfE74", + start_time: 697, + ), ), Text(" Issues\n"), - Video( + YouTube( text: "12:08", - id: "nFDBxBUfE74", - start_time: 728, + target: Video( + id: "nFDBxBUfE74", + start_time: 728, + ), ), Text(" First Play Test\n"), - Video( + YouTube( text: "13:20", - id: "nFDBxBUfE74", - start_time: 800, + target: Video( + id: "nFDBxBUfE74", + start_time: 800, + ), ), Text(" Conclusion"), ]), diff --git a/src/client/snapshots/rustypipe__client__video_details__tests__map_video_details_live.snap b/src/client/snapshots/rustypipe__client__video_details__tests__map_video_details_live.snap index 9f63a5c..d612ab7 100644 --- a/src/client/snapshots/rustypipe__client__video_details__tests__map_video_details_live.snap +++ b/src/client/snapshots/rustypipe__client__video_details__tests__map_video_details_live.snap @@ -7,10 +7,12 @@ VideoDetails( title: "🌎 Nasa Live Stream - Earth From Space : Live Views from the ISS", description: RichText([ Text("Live NASA - Views Of Earth from Space\nLive video feed of Earth from the International Space Station (ISS) Cameras\n-----------------------------------------------------------------------------------------------------\nWatch our latest video - The Sun - 4K Video / Solar Flares\n"), - Video( + YouTube( text: "https://www.youtube.com/watch?v=SEzK4...", - id: "SEzK4ZfMvUQ", - start_time: 0, + target: Video( + id: "SEzK4ZfMvUQ", + start_time: 0, + ), ), Text("\n-----------------------------------------------------------------------------------------------------\nNasa ISS live stream from aboard the International Space Station as it circles the earth at 240 miles above the planet, on the edge of space in low earth orbit. \n\nThe station is crewed by NASA astronauts as well as Russian Cosmonauts and a mixture of Japanese, Canadian and European astronauts as well.\n\n"), Text("#nasalive"), diff --git a/src/client/snapshots/rustypipe__client__video_details__tests__map_video_details_mv.snap b/src/client/snapshots/rustypipe__client__video_details__tests__map_video_details_mv.snap index 7c77d89..35c54fa 100644 --- a/src/client/snapshots/rustypipe__client__video_details__tests__map_video_details_mv.snap +++ b/src/client/snapshots/rustypipe__client__video_details__tests__map_video_details_mv.snap @@ -12,10 +12,12 @@ VideoDetails( url: "https://smarturl.it/aespa_BlackMamba", ), Text("\n🐍The Debut Stage "), - Video( + YouTube( text: "https://youtu.be/Ky5RT5oGg0w", - id: "Ky5RT5oGg0w", - start_time: 0, + target: Video( + id: "Ky5RT5oGg0w", + start_time: 0, + ), ), Text("\n\n🎟\u{fe0f} aespa Showcase SYNK in LA! Tickets now on sale: "), Web( diff --git a/src/client/url_resolver.rs b/src/client/url_resolver.rs new file mode 100644 index 0000000..9e1f15d --- /dev/null +++ b/src/client/url_resolver.rs @@ -0,0 +1,208 @@ +use serde::Serialize; + +use crate::{ + error::{Error, ExtractionError}, + model::UrlTarget, + param::Language, + serializer::MapResult, + util, +}; + +use super::{response, ClientType, MapResponse, RustyPipeQuery, YTContext}; + +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +struct QResolveUrl { + context: YTContext, + url: String, +} + +impl RustyPipeQuery { + pub async fn resolve_url(self, url: &str) -> Result { + let (url, params) = util::url_to_params(url)?; + + let mut is_shortlink = url.domain().and_then(|d| match d { + "youtu.be" => Some(true), + "youtube.com" => Some(false), + _ => None, + }); + let mut path_split = url + .path_segments() + .ok_or_else(|| Error::Other("invalid url: empty path".into()))?; + + let get_start_time = || { + params + .get("t") + .and_then(|t| t.parse::().ok()) + .unwrap_or_default() + }; + + let target = match path_split.next() { + Some("watch") => { + let id = params + .get("v") + .ok_or_else(|| Error::Other("invalid url: no video id".into()))? + .to_string(); + + Ok(UrlTarget::Video { + id, + start_time: get_start_time(), + }) + } + Some("channel") => match path_split.next() { + Some(id) => Ok(UrlTarget::Channel { id: id.to_owned() }), + None => Err(Error::Other("invalid url: no channel id".into())), + }, + Some("playlist") => { + let id = params + .get("list") + .ok_or_else(|| Error::Other("invalid url: no playlist id".into()))? + .to_string(); + + Ok(UrlTarget::Playlist { id }) + } + // Channel vanity URL or youtu.be shortlink + Some(mut id) => { + if id == "c" || id == "user" { + id = path_split.next().unwrap_or(id); + is_shortlink = Some(false); + } + + if id.is_empty() || id == "user" { + return Err(Error::Other( + "invalid url: no channel name / video id".into(), + )); + } + + match is_shortlink { + Some(true) => { + // youtu.be shortlink (e.g. youtu.be/gHzuabZUd6c) + Ok(UrlTarget::Video { + id: id.to_owned(), + start_time: get_start_time(), + }) + } + Some(false) => { + // Vanity URL (e.g. youtube.com/LinusTechTips) has to be resolved by the Innertube API + self._navigation_resolve_url(url.path()).await + } + None => { + // We dont have the original YT domain, so this can be both + // If there is a timestamp parameter, it has to be a video + // First check the innertube API if this is a channel vanity url + // If no channel is found and the identifier has the video ID format, assume it is a video + if !params.contains_key("t") + && util::VANITY_PATH_REGEX + .is_match(url.path()) + .unwrap_or_default() + { + match self._navigation_resolve_url(url.path()).await { + Ok(target) => Ok(target), + Err(Error::Extraction(ExtractionError::ContentUnavailable(e))) => { + match util::VIDEO_ID_REGEX.is_match(id).unwrap_or_default() { + true => Ok(UrlTarget::Video { + id: id.to_owned(), + start_time: get_start_time(), + }), + false => Err(Error::Extraction( + ExtractionError::ContentUnavailable(e), + )), + } + } + Err(e) => Err(e), + } + } else if util::VIDEO_ID_REGEX.is_match(id).unwrap_or_default() { + Ok(UrlTarget::Video { + id: id.to_owned(), + start_time: get_start_time(), + }) + } else { + Err(Error::Other("invalid video / channel id".into())) + } + } + } + } + None => Err(Error::Other("invalid url: empty path".into())), + }?; + + target.validate()?; + Ok(target) + } + + pub async fn resolve_string(self, string: &str) -> Result { + // URL with protocol + if string.starts_with("http://") || string.starts_with("https://") { + self.resolve_url(string).await + } + // URL without protocol + else if string.contains('/') && string.contains('.') { + self.resolve_url(&format!("https://{}", string)).await + } + // ID only + else if util::VIDEO_ID_REGEX.is_match(string).unwrap_or_default() { + Ok(UrlTarget::Video { + id: string.to_owned(), + start_time: 0, + }) + } else if util::CHANNEL_ID_REGEX.is_match(string).unwrap_or_default() { + Ok(UrlTarget::Channel { + id: string.to_owned(), + }) + } else if util::PLAYLIST_ID_REGEX.is_match(string).unwrap_or_default() { + Ok(UrlTarget::Playlist { + id: string.to_owned(), + }) + } + // Channel name only + else if util::VANITY_PATH_REGEX.is_match(string).unwrap_or_default() { + self._navigation_resolve_url(&format!("/{}", string.trim_start_matches('/'))) + .await + } else { + Err(Error::Other("invalid input string".into())) + } + } + + async fn _navigation_resolve_url(&self, url_path: &str) -> Result { + let context = self.get_context(ClientType::Desktop, true).await; + let request_body = QResolveUrl { + context, + url: format!("https://www.youtube.com{}", url_path), + }; + + self.execute_request::( + ClientType::Desktop, + "channel_id", + &request_body.url, + "navigation/resolve_url", + &request_body, + ) + .await + } +} + +impl MapResponse for response::ResolvedUrl { + fn map_response( + self, + _id: &str, + _lang: Language, + _deobf: Option<&crate::deobfuscate::Deobfuscator>, + ) -> Result, ExtractionError> { + let page_type = self + .endpoint + .command_metadata + .ok_or_else(|| ExtractionError::InvalidData("No command metadata".into()))? + .web_command_metadata + .web_page_type; + + let id = self + .endpoint + .browse_endpoint + .ok_or_else(|| ExtractionError::InvalidData("No browse ID".into()))? + .browse_id; + + Ok(MapResult { + c: page_type.to_url_target(id), + warnings: Vec::new(), + }) + } +} diff --git a/src/client/video_details.rs b/src/client/video_details.rs index 5d55af2..e317328 100644 --- a/src/client/video_details.rs +++ b/src/client/video_details.rs @@ -239,7 +239,7 @@ impl MapResponse for response::VideoDetails { page_type, browse_id, } => match page_type { - crate::serializer::text::PageType::Channel => (browse_id, text), + response::url_endpoint::PageType::Channel => (browse_id, text), _ => { return Err(ExtractionError::InvalidData( "invalid channel link type".into(), diff --git a/src/download.rs b/src/download.rs index 9e822ea..1d52655 100644 --- a/src/download.rs +++ b/src/download.rs @@ -88,7 +88,9 @@ async fn download_single_file>( // If the url is from googlevideo, extract file size from clen parameter let (url_base, url_params) = util::url_to_params(url).map_err(|e| DownloadError::Other(e.to_string().into()))?; - let is_gvideo = url_base.ends_with(".googlevideo.com/videoplayback"); + let is_gvideo = url_base + .as_str() + .ends_with(".googlevideo.com/videoplayback"); if is_gvideo { size = url_params.get("clen").and_then(|s| s.parse::().ok()); } diff --git a/src/model/mod.rs b/src/model/mod.rs index 1051122..b78c412 100644 --- a/src/model/mod.rs +++ b/src/model/mod.rs @@ -11,6 +11,8 @@ use std::ops::Range; use chrono::{DateTime, Local, Utc}; use serde::{Deserialize, Serialize}; +use crate::{error::Error, util}; + use self::richtext::RichText; /* @@ -26,6 +28,64 @@ pub struct Thumbnail { pub height: u32, } +/// Entities extracted from a YouTube URL +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub enum UrlTarget { + Video { id: String, start_time: u32 }, + Channel { id: String }, + Playlist { id: String }, +} + +impl ToString for UrlTarget { + fn to_string(&self) -> String { + self.to_url() + } +} + +impl UrlTarget { + pub fn to_url(&self) -> String { + self.to_url_yt_host("https://www.youtube.com") + } + + pub fn to_url_yt_host(&self, yt_host: &str) -> String { + match self { + UrlTarget::Video { id, start_time, .. } => match start_time { + 0 => format!("{}/watch?v={}", yt_host, id), + n => format!("{}/watch?v={}&t={}s", yt_host, id, n), + }, + UrlTarget::Channel { id } => { + format!("{}/channel/{}", yt_host, id) + } + UrlTarget::Playlist { id } => { + format!("{}/playlist?list={}", yt_host, id) + } + } + } + + pub(crate) fn validate(&self) -> Result<(), Error> { + match self { + UrlTarget::Video { id, .. } => { + match util::VIDEO_ID_REGEX.is_match(id).unwrap_or_default() { + true => Ok(()), + false => Err(Error::Other("invalid video id".into())), + } + } + UrlTarget::Channel { id } => { + match util::CHANNEL_ID_REGEX.is_match(id).unwrap_or_default() { + true => Ok(()), + false => Err(Error::Other("invalid channel id".into())), + } + } + UrlTarget::Playlist { id } => { + match util::PLAYLIST_ID_REGEX.is_match(id).unwrap_or_default() { + true => Ok(()), + false => Err(Error::Other("invalid playlist id".into())), + } + } + } + } +} + /* #PLAYER */ diff --git a/src/model/richtext.rs b/src/model/richtext.rs index 34192e4..f8ef5f2 100644 --- a/src/model/richtext.rs +++ b/src/model/richtext.rs @@ -2,6 +2,8 @@ use serde::{Deserialize, Serialize}; +use super::UrlTarget; + #[derive(Default, Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] #[non_exhaustive] pub struct RichText(pub Vec); @@ -13,20 +15,8 @@ pub enum TextComponent { Text(String), /// Web link Web { text: String, url: String }, - /// Link to a YouTube video - Video { - text: String, - id: String, - start_time: u32, - }, - /// Link to a YouTube channel - Channel { text: String, id: String }, - /// Link to a YouTube playlist - Playlist { text: String, id: String }, - /// Link to a YouTube Music artist - Artist { text: String, id: String }, - /// Link to a YouTube Music album - Album { text: String, id: String }, + /// Link to a YouTube entity + YouTube { text: String, target: UrlTarget }, } /// Trait for converting rich text to plain text. @@ -60,11 +50,7 @@ impl TextComponent { match self { TextComponent::Text(text) => text, TextComponent::Web { text, .. } => text, - TextComponent::Video { text, .. } => text, - TextComponent::Channel { text, .. } => text, - TextComponent::Playlist { text, .. } => text, - TextComponent::Artist { text, .. } => text, - TextComponent::Album { text, .. } => text, + TextComponent::YouTube { text, .. } => text, } } @@ -72,16 +58,7 @@ impl TextComponent { match self { TextComponent::Text(_) => "".to_owned(), TextComponent::Web { url, .. } => url.to_owned(), - TextComponent::Video { id, start_time, .. } => match start_time { - 0 => format!("{}/watch?v={}", yt_host, id), - n => format!("{}/watch?v={}&t={}s", yt_host, id, n), - }, - TextComponent::Channel { id, .. } | TextComponent::Artist { id, .. } => { - format!("{}/channel/{}", yt_host, id) - } - TextComponent::Playlist { id, .. } | TextComponent::Album { id, .. } => { - format!("{}/playlist?list={}", yt_host, id) - } + TextComponent::YouTube { target, .. } => target.to_url_yt_host(yt_host), } } } diff --git a/src/serializer/text.rs b/src/serializer/text.rs index d1a1574..a09b977 100644 --- a/src/serializer/text.rs +++ b/src/serializer/text.rs @@ -3,9 +3,13 @@ use std::convert::TryFrom; use fancy_regex::Regex; use once_cell::sync::Lazy; use serde::{Deserialize, Deserializer}; -use serde_with::{serde_as, DefaultOnError, DeserializeAs}; +use serde_with::{serde_as, DeserializeAs}; -use crate::util; +use crate::{ + client::response::url_endpoint::{NavigationEndpoint, PageType}, + model::UrlTarget, + util, +}; /// # Text /// @@ -146,84 +150,6 @@ struct AttributedTextOnTap { innertube_command: NavigationEndpoint, } -#[serde_as] -#[derive(Deserialize, Default)] -#[serde(rename_all = "camelCase")] -struct NavigationEndpoint { - #[serde(default)] - #[serde_as(deserialize_as = "DefaultOnError")] - watch_endpoint: Option, - #[serde(default)] - #[serde_as(deserialize_as = "DefaultOnError")] - browse_endpoint: Option, - #[serde(default)] - #[serde_as(deserialize_as = "DefaultOnError")] - url_endpoint: Option, - #[serde(default)] - #[serde_as(deserialize_as = "DefaultOnError")] - command_metadata: Option, -} - -#[derive(Deserialize)] -#[serde(rename_all = "camelCase")] -struct WatchEndpoint { - video_id: String, - #[serde(default)] - start_time_seconds: u32, -} - -#[derive(Deserialize)] -#[serde(rename_all = "camelCase")] -struct BrowseEndpoint { - browse_id: String, - browse_endpoint_context_supported_configs: Option, -} - -#[derive(Deserialize)] -#[serde(rename_all = "camelCase")] -struct UrlEndpoint { - url: String, -} - -#[derive(Deserialize)] -#[serde(rename_all = "camelCase")] -struct BrowseEndpointConfig { - browse_endpoint_context_music_config: BrowseEndpointMusicConfig, -} - -#[derive(Deserialize)] -#[serde(rename_all = "camelCase")] -struct BrowseEndpointMusicConfig { - page_type: PageType, -} - -#[derive(Deserialize)] -#[serde(rename_all = "camelCase")] -struct CommandMetadata { - web_command_metadata: WebCommandMetadata, -} - -#[derive(Deserialize)] -#[serde(rename_all = "camelCase")] -struct WebCommandMetadata { - web_page_type: PageType, -} - -#[derive(Clone, Copy, Debug, Deserialize, PartialEq, Eq)] -pub enum PageType { - #[serde(rename = "MUSIC_PAGE_TYPE_ARTIST")] - Artist, - #[serde(rename = "MUSIC_PAGE_TYPE_ALBUM")] - Album, - #[serde( - rename = "MUSIC_PAGE_TYPE_USER_CHANNEL", - alias = "WEB_PAGE_TYPE_CHANNEL" - )] - Channel, - #[serde(rename = "MUSIC_PAGE_TYPE_PLAYLIST", alias = "WEB_PAGE_TYPE_PLAYLIST")] - Playlist, -} - impl From for TextComponent { fn from(run: RichTextRun) -> Self { map_text_component(run.text, run.navigation_endpoint) @@ -387,32 +313,20 @@ impl From for crate::model::richtext::TextComponent { text, video_id, start_time, - } => Self::Video { + } => Self::YouTube { text, - id: video_id, - start_time, + target: UrlTarget::Video { + id: video_id, + start_time, + }, }, TextComponent::Browse { text, page_type, browse_id, - } => match page_type { - PageType::Artist => Self::Artist { - text, - id: browse_id, - }, - PageType::Album => Self::Album { - text, - id: browse_id, - }, - PageType::Channel => Self::Channel { - text, - id: browse_id, - }, - PageType::Playlist => Self::Playlist { - text, - id: browse_id, - }, + } => Self::YouTube { + text, + target: page_type.to_url_target(browse_id), }, TextComponent::Web { text, url } => Self::Web { text, diff --git a/src/util/mod.rs b/src/util/mod.rs index d87db0a..82295e7 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -17,6 +17,14 @@ use url::Url; use crate::{error::Error, param::Language}; +pub static VIDEO_ID_REGEX: Lazy = Lazy::new(|| Regex::new(r"^[A-Za-z0-9_-]{11}$").unwrap()); +pub static CHANNEL_ID_REGEX: Lazy = + Lazy::new(|| Regex::new(r"^UC[A-Za-z0-9_-]{22}$").unwrap()); +pub static PLAYLIST_ID_REGEX: Lazy = + Lazy::new(|| Regex::new(r"^(?:PL|RD)[A-Za-z0-9_-]{30,}$").unwrap()); +pub static VANITY_PATH_REGEX: Lazy = + Lazy::new(|| Regex::new(r"^/?(?:(?:c\/|user\/)?[A-z0-9]+)|(?:@[A-z0-9-_.]+)$").unwrap()); + const CONTENT_PLAYBACK_NONCE_ALPHABET: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; @@ -57,7 +65,7 @@ pub fn generate_content_playback_nonce() -> String { /// Example: /// /// `example.com/api?k1=v1&k2=v2 => example.com/api; {k1: v1, k2: v2}` -pub fn url_to_params(url: &str) -> Result<(String, BTreeMap), Error> { +pub fn url_to_params(url: &str) -> Result<(Url, BTreeMap), Error> { let mut parsed_url = Url::parse(url) .map_err(|e| Error::Other(format!("could not parse url `{}` err: {}", url, e).into()))?; let url_params: BTreeMap = parsed_url @@ -67,7 +75,7 @@ pub fn url_to_params(url: &str) -> Result<(String, BTreeMap), Er parsed_url.set_query(None); - Ok((parsed_url.to_string(), url_params)) + Ok((parsed_url, url_params)) } pub fn urlencode(string: &str) -> String { diff --git a/tests/youtube.rs b/tests/youtube.rs index 636e628..71a4358 100644 --- a/tests/youtube.rs +++ b/tests/youtube.rs @@ -7,7 +7,7 @@ use rustypipe::client::{ClientType, RustyPipe}; use rustypipe::error::{Error, ExtractionError}; use rustypipe::model::richtext::ToPlaintext; use rustypipe::model::{ - AudioCodec, AudioFormat, Channel, SearchItem, Verification, VideoCodec, VideoFormat, + AudioCodec, AudioFormat, Channel, SearchItem, UrlTarget, Verification, VideoCodec, VideoFormat, }; use rustypipe::param::{ search_filter::{self, SearchFilter}, @@ -1205,6 +1205,66 @@ async fn search_suggestion_empty() { assert!(result.is_empty()); } +//#URL RESOLVER + +#[rstest] +#[case("https://www.youtube.com/LinusTechTips", UrlTarget::Channel {id: "UCXuqSBlHAE6Xw-yeJA0Tunw".to_owned()})] +#[case("https://www.youtube.com/@AndroidAuthority", UrlTarget::Channel {id: "UCgyqtNWZmIxTx3b6OxTSALw".to_owned()})] +#[case("https://www.youtube.com/channel/UC5I2hjZYiW9gZPVkvzM8_Cw", UrlTarget::Channel {id: "UC5I2hjZYiW9gZPVkvzM8_Cw".to_owned()})] +#[case("https://www.youtube.com/c", UrlTarget::Channel {id: "UCXE6F2oZzy_6xEXiJiUFo2w".to_owned()})] +#[case("https://www.youtube.com/user/MrBeast6000", UrlTarget::Channel {id: "UCX6OQ3DkcsbYNE6H8uQQuVA".to_owned()})] +#[case("https://www.youtube.com/watch?v=dQw4w9WgXcQ", UrlTarget::Video {id: "dQw4w9WgXcQ".to_owned(), start_time: 0})] +#[case("https://www.youtube.com/watch?v=dQw4w9WgXcQ&t=60", UrlTarget::Video {id: "dQw4w9WgXcQ".to_owned(), start_time: 60})] +#[case("https://www.youtube.com/playlist?list=PL4lEESSgxM_5O81EvKCmBIm_JT5Q7JeaI", UrlTarget::Playlist {id: "PL4lEESSgxM_5O81EvKCmBIm_JT5Q7JeaI".to_owned()})] +#[case("https://www.youtube.com/playlist?list=RDCLAK5uy_kFQXdnqMaQCVx2wpUM4ZfbsGCDibZtkJk", UrlTarget::Playlist {id: "RDCLAK5uy_kFQXdnqMaQCVx2wpUM4ZfbsGCDibZtkJk".to_owned()})] +#[case("https://youtu.be/dQw4w9WgXcQ", UrlTarget::Video {id: "dQw4w9WgXcQ".to_owned(), start_time: 0})] +#[case("https://youtu.be/dQw4w9WgXcQ?t=60", UrlTarget::Video {id: "dQw4w9WgXcQ".to_owned(), start_time: 60})] +#[case("https://youtu.be/dQw4w9WgXcQ", UrlTarget::Video {id: "dQw4w9WgXcQ".to_owned(), start_time: 0})] +#[case("https://youtu.be/dQw4w9WgXcQ?t=60", UrlTarget::Video {id: "dQw4w9WgXcQ".to_owned(), start_time: 60})] +#[case("https://piped.mha.fi/watch?v=dQw4w9WgXcQ", UrlTarget::Video {id: "dQw4w9WgXcQ".to_owned(), start_time: 0})] +// Both a video ID and a channel name => returns channel +#[case("https://piped.mha.fi/dQw4w9WgXcQ", UrlTarget::Channel {id: "UCoG6BrhgmivrkcbEHcYtK4Q".to_owned()})] +// Both a video ID and a channel name + video time param => returns video +#[case("https://piped.mha.fi/dQw4w9WgXcQ?t=0", UrlTarget::Video {id: "dQw4w9WgXcQ".to_owned(), start_time: 0})] +#[tokio::test] +async fn resolve_url(#[case] url: &str, #[case] expect: UrlTarget) { + let rp = RustyPipe::builder().strict().build(); + let target = rp.query().resolve_url(url).await.unwrap(); + assert_eq!(target, expect); +} + +#[rstest] +#[case("LinusTechTips", UrlTarget::Channel {id: "UCXuqSBlHAE6Xw-yeJA0Tunw".to_owned()})] +#[case("@AndroidAuthority", UrlTarget::Channel {id: "UCgyqtNWZmIxTx3b6OxTSALw".to_owned()})] +#[case("UC5I2hjZYiW9gZPVkvzM8_Cw", UrlTarget::Channel {id: "UC5I2hjZYiW9gZPVkvzM8_Cw".to_owned()})] +#[case("c", UrlTarget::Channel {id: "UCXE6F2oZzy_6xEXiJiUFo2w".to_owned()})] +#[case("user/MrBeast6000", UrlTarget::Channel {id: "UCX6OQ3DkcsbYNE6H8uQQuVA".to_owned()})] +#[case("@AndroidAuthority", UrlTarget::Channel {id: "UCgyqtNWZmIxTx3b6OxTSALw".to_owned()})] +#[case("dQw4w9WgXcQ", UrlTarget::Video {id: "dQw4w9WgXcQ".to_owned(), start_time: 0})] +#[case("PL4lEESSgxM_5O81EvKCmBIm_JT5Q7JeaI", UrlTarget::Playlist {id: "PL4lEESSgxM_5O81EvKCmBIm_JT5Q7JeaI".to_owned()})] +#[case("RDCLAK5uy_kFQXdnqMaQCVx2wpUM4ZfbsGCDibZtkJk", UrlTarget::Playlist {id: "RDCLAK5uy_kFQXdnqMaQCVx2wpUM4ZfbsGCDibZtkJk".to_owned()})] +#[tokio::test] +async fn resolve_string(#[case] string: &str, #[case] expect: UrlTarget) { + let rp = RustyPipe::builder().strict().build(); + let target = rp.query().resolve_string(string).await.unwrap(); + assert_eq!(target, expect); +} + +#[tokio::test] +async fn resolve_channel_not_found() { + let rp = RustyPipe::builder().strict().build(); + let err = rp + .query() + .resolve_url("https://www.youtube.com/feeqegnhq3rkwghjq43ruih43io3") + .await + .unwrap_err(); + + assert!(matches!( + err, + Error::Extraction(ExtractionError::ContentUnavailable(_)) + )); +} + //#TRENDS #[tokio::test] @@ -1213,8 +1273,8 @@ async fn startpage() { let result = rp.query().startpage().await.unwrap(); assert!( - result.items.len() > 20, - "expected > 20 items, got {}", + result.items.len() >= 20, + "expected >= 20 items, got {}", result.items.len() ); assert!(!result.is_exhausted()); @@ -1228,8 +1288,8 @@ async fn startpage_cont() { let next = startpage.next(rp.query()).await.unwrap().unwrap(); assert!( - next.items.len() > 20, - "expected > 20 items, got {}", + next.items.len() >= 20, + "expected >= 20 items, got {}", next.items.len() ); assert!(!next.is_exhausted()); @@ -1241,8 +1301,8 @@ async fn trending() { let result = rp.query().trending().await.unwrap(); assert!( - result.len() > 50, - "expected > 50 items, got {}", + result.len() >= 50, + "expected >= 50 items, got {}", result.len() ); }