feat: redirect secondary YT channels to the YTM channel

Squashed commit of the following:

commit 88809265ead6eadfafab4b74091dd1af357b9577
Author: ThetaDev <t.testboy@gmail.com>
Date:   Sat Jan 21 22:16:23 2023 +0100

    feat: redirect secondary YT channels to the YTM channel

commit 02cc120912509f40f45da243ba5d37798b9ff411
Author: ThetaDev <t.testboy@gmail.com>
Date:   Mon Jan 9 23:57:18 2023 +0100

    add artists_no_tracks testfile
This commit is contained in:
ThetaDev 2023-01-21 22:18:25 +01:00
parent f44bc6434a
commit a706a7011b
11 changed files with 13543 additions and 23 deletions

View file

@ -49,6 +49,7 @@ futures = "0.3.21"
ress = "0.11.4"
phf = "0.11.1"
base64 = "0.20.0"
urlencoding = "2.1.2"
quick-xml = { version = "0.26.0", features = ["serialize"], optional = true }
[dev-dependencies]

View file

@ -713,12 +713,13 @@ async fn music_search_suggestion(testfiles: &Path) {
}
async fn music_artist(testfiles: &Path) {
for (name, id) in [
("default", "UClmXPfaYhXOYsNn_QUyheWQ"),
("no_more_albums", "UC_vmjW5e1xEHhYjY2a0kK1A"),
("only_singles", "UCfwCE5VhPMGxNPFxtVv7lRw"),
("no_artist", "UCh8gHdtzO2tXd593_bjErWg"),
("only_more_singles", "UC0aXrjVxG5pZr99v77wZdPQ"),
for (name, id, all_albums) in [
("default", "UClmXPfaYhXOYsNn_QUyheWQ", true),
("no_more_albums", "UC_vmjW5e1xEHhYjY2a0kK1A", true),
("only_singles", "UCfwCE5VhPMGxNPFxtVv7lRw", true),
("no_artist", "UCh8gHdtzO2tXd593_bjErWg", true),
("only_more_singles", "UC0aXrjVxG5pZr99v77wZdPQ", true),
("secondary_channel", "UCC9192yGQD25eBZgFZ84MPw", false),
] {
let mut json_path = testfiles.to_path_buf();
json_path.push("music_artist");
@ -728,7 +729,7 @@ async fn music_artist(testfiles: &Path) {
}
let rp = rp_testfile(&json_path);
rp.query().music_artist(id, true).await.unwrap();
rp.query().music_artist(id, all_albums).await.unwrap();
}
}

View file

@ -34,6 +34,20 @@ impl RustyPipeQuery {
artist_id: S,
all_albums: bool,
) -> Result<MusicArtist, Error> {
let res = self._music_artist(artist_id, all_albums).await;
if let Err(Error::Extraction(ExtractionError::Redirect(id))) = res {
self._music_artist(&id, all_albums).await.map(|x| *x)
} else {
res.map(|x| *x)
}
}
async fn _music_artist<S: AsRef<str>>(
&self,
artist_id: S,
all_albums: bool,
) -> Result<Box<MusicArtist>, Error> {
let artist_id = artist_id.as_ref();
if all_albums {
@ -74,7 +88,7 @@ impl RustyPipeQuery {
artist.albums.append(&mut res);
}
Ok(artist)
Ok(artist.into())
} else {
let context = self.get_context(ClientType::DesktopMusic, true, None).await;
let request_body = QBrowse {
@ -90,6 +104,7 @@ impl RustyPipeQuery {
&request_body,
)
.await
.map(|x: MusicArtist| x.into())
}
}
@ -155,6 +170,21 @@ fn map_artist_page(
let header = res.header.music_immersive_header_renderer;
if let Some(share) = header.share_endpoint {
let pb = share.share_entity_endpoint.serialized_share_entity;
let share_channel_id = urlencoding::decode(&pb)
.ok()
.and_then(|pb| base64::decode(pb.as_bytes()).ok())
.and_then(|pb| util::string_from_pb(pb, 3));
if let Some(share_channel_id) = share_channel_id {
if share_channel_id != id {
return Err(ExtractionError::Redirect(share_channel_id));
}
}
}
let mut content = res.contents.single_column_browse_results_renderer.contents;
let sections = content
.try_swap_remove(0)
@ -390,4 +420,23 @@ mod tests {
);
insta::assert_ron_snapshot!(map_res.c);
}
#[test]
fn map_music_artist_secondary_channel() {
let json_path = path!("testfiles" / "music_artist" / "artist_secondary_channel.json");
let json_file = File::open(json_path).unwrap();
let artist: response::MusicArtist =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let res: Result<MapResult<MusicArtist>, ExtractionError> =
artist.map_response("UCLkAepWjdylmXSltofFvsYQ", Language::En, None);
let e = res.unwrap_err();
match e {
ExtractionError::Redirect(id) => {
assert_eq!(id, "UCOR4_bSVIXPsGa4BbCSt60Q")
}
_ => panic!("error: {}", e),
}
}
}

View file

@ -61,8 +61,7 @@ impl RustyPipeQuery {
/// Get YouTube player data (video/audio streams + basic metadata)
pub async fn player<S: AsRef<str>>(&self, video_id: S) -> Result<VideoPlayer, Error> {
let video_id = video_id.as_ref();
let q1 = self.clone();
let android_res = q1.player_from_client(video_id, ClientType::Android).await;
let android_res = self.player_from_client(video_id, ClientType::Android).await;
match android_res {
Ok(res) => Ok(res),

View file

@ -38,6 +38,9 @@ pub(crate) struct MusicHeaderRenderer {
pub description: Option<String>,
#[serde(default)]
pub thumbnail: MusicThumbnailRenderer,
#[serde(default)]
#[serde_as(as = "DefaultOnError")]
pub share_endpoint: Option<ShareEndpoint>,
}
#[derive(Debug, Deserialize)]
@ -54,6 +57,18 @@ pub(crate) struct SubscriptionButtonRenderer {
pub subscriber_count_text: String,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct ShareEndpoint {
pub share_entity_endpoint: ShareEntityEndpoint,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct ShareEntityEndpoint {
pub serialized_share_entity: String,
}
/// Response model for YouTube Music artist album page
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]

View file

@ -87,6 +87,11 @@ pub enum ExtractionError {
/// Android client.
#[error("got wrong result from YT: {0}")]
WrongResult(String),
/// YouTube redirects you to another content ID
///
/// This is used internally for YouTube Music channels that link to a main channel.
#[error("redirecting to: {0}")]
Redirect(String),
/// Warnings occurred during deserialization/mapping
///
/// This error is only returned in strict mode.

View file

@ -2,7 +2,7 @@
use std::collections::BTreeSet;
use crate::util::{self, ProtoBuilder};
use crate::util::ProtoBuilder;
/// YouTube search filter
///
@ -201,7 +201,7 @@ impl SearchFilter {
}
let b64 = base64::encode(pb.bytes);
util::urlencode(&b64)
urlencoding::encode(&b64).to_string()
}
}
@ -236,6 +236,6 @@ mod tests {
#[case(SearchFilter::new().sort(Order::Views), "CAM%253D")]
#[case(SearchFilter::new().sort(Order::Rating), "CAE%253D")]
fn t_filter(#[case] filter: SearchFilter, #[case] expect: &str) {
assert_eq!(util::urlencode(&filter.encode()), expect);
assert_eq!(urlencoding::encode(&filter.encode()), expect);
}
}

View file

@ -4,7 +4,7 @@ mod protobuf;
pub mod dictionary;
pub use date::{month_from_n, now_sec, shift_months, shift_years};
pub use protobuf::ProtoBuilder;
pub use protobuf::{string_from_pb, ProtoBuilder};
use std::{
borrow::{Borrow, Cow},
@ -89,12 +89,6 @@ pub fn url_to_params(url: &str) -> Result<(Url, BTreeMap<String, String>), Error
Ok((parsed_url, url_params))
}
pub fn urlencode(string: &str) -> String {
url::form_urlencoded::Serializer::new(String::new())
.append_key_only(string)
.finish()
}
/// Parse a string after removing all non-numeric characters
pub fn parse_numeric<F>(string: &str) -> Result<F, F::Err>
where

View file

@ -54,3 +54,85 @@ impl ProtoBuilder {
self.bytes.append(&mut pb.bytes);
}
}
fn parse_varint<P: Iterator<Item = u8>>(pb: &mut P) -> Option<u64> {
let mut result = 0;
let mut num_read = 0;
for b in pb.by_ref() {
let value = b & 0x7f;
result |= (value as u64) << (7 * num_read);
num_read += 1;
if b & 0x80 == 0 {
break;
}
}
if num_read == 0 {
None
} else {
Some(result)
}
}
fn parse_field<P: Iterator<Item = u8>>(pb: &mut P) -> Option<(u32, u8)> {
parse_varint(pb).map(|v| {
let f = (v >> 3) as u32;
let w = (v & 0x07) as u8;
(f, w)
})
}
pub fn string_from_pb<P: IntoIterator<Item = u8>>(pb: P, field: u32) -> Option<String> {
let mut pb = pb.into_iter();
while let Some((this_field, wire)) = parse_field(&mut pb) {
let to_skip = match wire {
// varint
0 => {
parse_varint(&mut pb);
0
}
// fixed 64bit
1 => 8,
// fixed 32bit
5 => 4,
// string
2 => {
let len = some_or_bail!(parse_varint(&mut pb), None);
if this_field == field {
let mut buf = Vec::new();
for _ in 0..len {
buf.push(some_or_bail!(pb.next(), None));
}
return String::from_utf8(buf).ok();
} else {
len
}
}
_ => return None,
};
for _ in 0..to_skip {
pb.next();
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
// #[test]
// fn t_parse_varint() {
// }
#[test]
fn t_parse_proto() {
let p = "GhhVQzl2cnZOU0wzeGNXR1NrVjg2UkVCU2c%3D";
let p_bytes = base64::decode(urlencoding::decode(p).unwrap().as_bytes()).unwrap();
let res = string_from_pb(p_bytes, 3).unwrap();
assert_eq!(res, "UC9vrvNSL3xcWGSkV86REBSg");
}
}

File diff suppressed because it is too large Load diff

View file

@ -44,7 +44,7 @@ async fn get_player_from_client(#[case] client_type: ClientType) {
if client_type == ClientType::DesktopMusic {
assert!(player_data.details.description.is_none());
} else {
assert!(player_data.details.description.unwrap().starts_with(
assert!(player_data.details.description.unwrap().contains(
"NCS (NoCopyrightSounds): Empowering Creators through Copyright / Royalty Free Music"
));
}
@ -743,7 +743,10 @@ async fn get_video_details_agegate() {
insta::assert_ron_snapshot!(details.description, @"RichText([])");
assert_eq!(details.channel.id, "UCQT2yul0lr6Ie9qNQNmw-sg");
assert_eq!(details.channel.name, "PrinceOfFALLEN");
assert_eq!(
details.channel.name,
"Dale Earnhardt Juniors Retired YouYoube Channel"
);
assert!(!details.channel.avatar.is_empty(), "no channel avatars");
assert_eq!(details.channel.verification, Verification::None);
assert_gte(
@ -1418,6 +1421,8 @@ async fn music_album_not_found() {
#[case::no_more_albums("no_more_albums", "UCOR4_bSVIXPsGa4BbCSt60Q", true, 15, 0)]
#[case::only_singles("only_singles", "UCfwCE5VhPMGxNPFxtVv7lRw", false, 13, 0)]
#[case::no_artist("no_artist", "UCh8gHdtzO2tXd593_bjErWg", false, 0, 2)]
// querying Trailerpark's secondary YouTube channel should result in the YTM channel being fetched
#[case::secondary_channel("no_more_albums", "UCC9192yGQD25eBZgFZ84MPw", true, 15, 0)]
#[tokio::test]
async fn music_artist(
#[case] name: &str,
@ -2085,7 +2090,7 @@ async fn music_new_albums() {
async fn music_new_videos() {
let rp = RustyPipe::builder().strict().build();
let videos = rp.query().music_new_videos().await.unwrap();
assert_gte(videos.len(), 10, "videos");
assert_gte(videos.len(), 5, "videos");
for video in videos {
assert_video_id(&video.id);