Phase 6 — Search + Channel + Playlist + LinkHandler
Pulls in the read-side extractor surfaces Straw needs at app open
(search bar) + on detail screens (channel + playlist).
src/youtube/linkhandler/
* mod.rs — ACCEPTED_HOSTS allowlist (youtube.com /
youtube-nocookie.com / youtu.be / m.youtube.com /
music.youtube.com); 27 Invidious mirror hosts
intentionally dropped (SPEC §6.6).
* stream.rs — extract_video_id() handles /watch?v= / youtu.be/ /
/embed/ / /shorts/ / /v/ / /live/ / attribution_link;
strict 11-char [A-Za-z0-9_-] validation.
* channel.rs — ChannelIdentifier enum (DirectId / Handle / Custom /
LegacyUser). Resolution to UC… id lands in
youtube/channel.rs.
* playlist.rs — extracts ?list=<PLid> from /playlist and /watch URLs.
* search.rs — SearchFilter enum + params() opaque base64 strings +
uses_music_endpoint() routing flag.
src/youtube/search_extractor.rs
* search(query, filter) → SearchInfo { query, corrected_query,
videos, continuation_token }
* Walks twoColumnSearchResultsRenderer → sectionListRenderer →
itemSectionRenderer → videoRenderer (+ shelfRenderer recursion).
* Parses YT duration strings, view-count abbreviations ('1.5M views'),
publishedTimeText, ownerBadges verified flag, badge LIVE flag.
* Music-search filters route to WEB_REMIX — flagged as not-yet-impl.
src/youtube/suggestion_extractor.rs
* suggestions(query) → Vec<String> via the suggestqueries-clients6
endpoint; handles both XSSI-prefixed and bare JSON responses.
src/youtube/channel.rs
* resolve_handle_to_channel_id() via /youtubei/v1/navigation/resolve_url
* channel_info(ChannelIdentifier) → ChannelInfo {
name, description, avatars, banners, subscriber_count, verified,
recent_videos, videos_continuation
}
* Parses both c4TabbedHeaderRenderer (most common) and the newer
pageHeaderRenderer flavor.
* subscriber_count parser handles K/M/B suffixes.
src/youtube/playlist_extractor.rs
* playlist_info(playlist_id) → PlaylistInfo with first-page video
list + continuation_token. Browses with browseId='VL<id>'.
* Walks playlistMetadataRenderer + playlistSidebarRenderer + the
playlistVideoListRenderer.contents[] for video items.
Tests: 121 lib unit pass (+44 since Phase 5). All previous phase smoke
tests still green.
What's left:
* Phase 6 kiosks (Trending etc) — minor, deferred
* Phase 7 — UniFFI surface swap into Straw (Straw repo work)
* Phase 8 — delete rustypipe (Straw repo work)
This commit is contained in:
parent
b4286b8236
commit
f79d8fb109
10 changed files with 1663 additions and 0 deletions
294
src/youtube/channel.rs
Normal file
294
src/youtube/channel.rs
Normal file
|
|
@ -0,0 +1,294 @@
|
|||
// YoutubeChannelExtractor + helper.resolveChannelId — fetches channel
|
||||
// info via /youtubei/v1/browse. Mirrors NPE
|
||||
// services/youtube/extractors/YoutubeChannelExtractor.java +
|
||||
// YoutubeChannelHelper.java.
|
||||
//
|
||||
// Handle / custom URL / legacy user resolution: NPE issues
|
||||
// `/youtubei/v1/navigation/resolve_url` against the `youtube.com/@handle`
|
||||
// URL, walks `endpoint.browseEndpoint.browseId` to get the UC... id, and
|
||||
// retries the browse call. Up to 3 redirect hops.
|
||||
//
|
||||
// Tab parsing (videos/shorts/live/playlists) is in audit Track D §5 —
|
||||
// `tabs[].tabRenderer.endpoint.browseEndpoint.params` is the magic
|
||||
// base64 needed to land on each tab.
|
||||
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::downloader::request::Request;
|
||||
use crate::exceptions::{ExtractionError, NetworkError, ParsingError};
|
||||
use crate::image::{Image, ImageSet, ResolutionLevel};
|
||||
use crate::newpipe::NewPipe;
|
||||
use crate::stream::StreamInfoItem;
|
||||
use crate::youtube::client_request::build_desktop_envelope;
|
||||
use crate::youtube::constants::*;
|
||||
use crate::youtube::linkhandler::channel::ChannelIdentifier;
|
||||
use crate::youtube::parsing::{web_client_version, youtube_post_headers};
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct ChannelInfo {
|
||||
pub channel_id: String,
|
||||
pub url: String,
|
||||
pub name: String,
|
||||
pub description: String,
|
||||
pub avatars: ImageSet,
|
||||
pub banners: ImageSet,
|
||||
pub subscriber_count: i64,
|
||||
pub verified: bool,
|
||||
pub recent_videos: Vec<StreamInfoItem>,
|
||||
pub videos_continuation: Option<String>,
|
||||
}
|
||||
|
||||
pub fn channel_info(identifier: ChannelIdentifier) -> Result<ChannelInfo, ExtractionError> {
|
||||
let resolved = match identifier {
|
||||
ChannelIdentifier::DirectId(id) => id,
|
||||
ChannelIdentifier::Handle(h) => resolve_handle_to_channel_id(&format!("@{h}"))?,
|
||||
ChannelIdentifier::Custom(c) => resolve_handle_to_channel_id(&format!("c/{c}"))?,
|
||||
ChannelIdentifier::LegacyUser(u) => resolve_handle_to_channel_id(&format!("user/{u}"))?,
|
||||
};
|
||||
fetch_channel_browse(&resolved)
|
||||
}
|
||||
|
||||
pub fn resolve_handle_to_channel_id(url_fragment: &str) -> Result<String, ExtractionError> {
|
||||
let downloader = NewPipe::downloader().ok_or(ExtractionError::DownloaderMissing)?;
|
||||
let localization = NewPipe::preferred_localization();
|
||||
let content_country = NewPipe::preferred_content_country();
|
||||
let target_url = format!("https://www.youtube.com/{url_fragment}");
|
||||
let mut envelope = build_desktop_envelope(&localization, &content_country, &web_client_version());
|
||||
if let Value::Object(ref mut map) = envelope {
|
||||
map.insert("url".into(), Value::String(target_url));
|
||||
}
|
||||
let url = format!("{YOUTUBEI_V1_URL}navigation/resolve_url{DISABLE_PRETTY_PRINT_PARAM}");
|
||||
let body = serde_json::to_vec(&envelope).map_err(|e| {
|
||||
ExtractionError::Parsing(ParsingError::Invalid(format!("serialize: {e}")))
|
||||
})?;
|
||||
let mut builder = Request::post(&url, body);
|
||||
for (k, v) in youtube_post_headers() {
|
||||
builder = builder.add_header(&k, &v);
|
||||
}
|
||||
let resp = downloader.execute(builder.build())?;
|
||||
if resp.response_code() != 200 {
|
||||
return Err(ExtractionError::Network(NetworkError::Transport(format!(
|
||||
"resolve_url HTTP {}",
|
||||
resp.response_code()
|
||||
))));
|
||||
}
|
||||
let parsed: Value = serde_json::from_str(resp.response_body())
|
||||
.map_err(|e| ExtractionError::Parsing(ParsingError::JsonShape(e.to_string())))?;
|
||||
parsed
|
||||
.get("endpoint")
|
||||
.and_then(|e| e.get("browseEndpoint"))
|
||||
.and_then(|b| b.get("browseId"))
|
||||
.and_then(|i| i.as_str())
|
||||
.map(String::from)
|
||||
.ok_or_else(|| {
|
||||
ExtractionError::Parsing(ParsingError::MissingField(
|
||||
"endpoint.browseEndpoint.browseId".into(),
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
pub fn fetch_channel_browse(channel_id: &str) -> Result<ChannelInfo, ExtractionError> {
|
||||
let downloader = NewPipe::downloader().ok_or(ExtractionError::DownloaderMissing)?;
|
||||
let localization = NewPipe::preferred_localization();
|
||||
let content_country = NewPipe::preferred_content_country();
|
||||
let mut envelope =
|
||||
build_desktop_envelope(&localization, &content_country, &web_client_version());
|
||||
if let Value::Object(ref mut map) = envelope {
|
||||
map.insert("browseId".into(), Value::String(channel_id.into()));
|
||||
}
|
||||
let url = format!("{YOUTUBEI_V1_URL}browse{DISABLE_PRETTY_PRINT_PARAM}");
|
||||
let body = serde_json::to_vec(&envelope).map_err(|e| {
|
||||
ExtractionError::Parsing(ParsingError::Invalid(format!("serialize: {e}")))
|
||||
})?;
|
||||
let mut builder = Request::post(&url, body);
|
||||
for (k, v) in youtube_post_headers() {
|
||||
builder = builder.add_header(&k, &v);
|
||||
}
|
||||
let resp = downloader.execute(builder.build())?;
|
||||
if resp.response_code() != 200 {
|
||||
return Err(ExtractionError::Network(NetworkError::Transport(format!(
|
||||
"browse HTTP {}",
|
||||
resp.response_code()
|
||||
))));
|
||||
}
|
||||
let parsed: Value = serde_json::from_str(resp.response_body())
|
||||
.map_err(|e| ExtractionError::Parsing(ParsingError::JsonShape(e.to_string())))?;
|
||||
Ok(parse_channel_browse(channel_id, &parsed))
|
||||
}
|
||||
|
||||
pub fn parse_channel_browse(channel_id: &str, body: &Value) -> ChannelInfo {
|
||||
let mut info = ChannelInfo {
|
||||
channel_id: channel_id.to_string(),
|
||||
url: format!("https://www.youtube.com/channel/{channel_id}"),
|
||||
..ChannelInfo::default()
|
||||
};
|
||||
|
||||
// C4_TABBED header flavor is the most common.
|
||||
if let Some(header) = body
|
||||
.get("header")
|
||||
.and_then(|h| h.get("c4TabbedHeaderRenderer"))
|
||||
{
|
||||
if let Some(s) = header.get("title").and_then(|t| t.as_str()) {
|
||||
info.name = s.to_string();
|
||||
}
|
||||
info.avatars = parse_image_set(header.get("avatar"));
|
||||
info.banners = parse_image_set(header.get("banner"));
|
||||
if let Some(text) = header
|
||||
.get("subscriberCountText")
|
||||
.and_then(|s| s.get("simpleText"))
|
||||
.and_then(|s| s.as_str())
|
||||
{
|
||||
info.subscriber_count = parse_subscriber_count(text);
|
||||
}
|
||||
if let Some(badges) = header.get("badges").and_then(|b| b.as_array()) {
|
||||
info.verified = badges.iter().any(|b| {
|
||||
b.get("metadataBadgeRenderer")
|
||||
.and_then(|m| m.get("style"))
|
||||
.and_then(|s| s.as_str())
|
||||
.map(|s| s.starts_with("BADGE_STYLE_TYPE_VERIFIED"))
|
||||
.unwrap_or(false)
|
||||
});
|
||||
}
|
||||
}
|
||||
// Alternative pageHeaderRenderer (newer flavor — 2025+)
|
||||
else if let Some(header) = body
|
||||
.get("header")
|
||||
.and_then(|h| h.get("pageHeaderRenderer"))
|
||||
{
|
||||
if let Some(s) = header.get("pageTitle").and_then(|t| t.as_str()) {
|
||||
info.name = s.to_string();
|
||||
}
|
||||
}
|
||||
|
||||
// microformat / description
|
||||
if let Some(desc) = body
|
||||
.get("metadata")
|
||||
.and_then(|m| m.get("channelMetadataRenderer"))
|
||||
.and_then(|m| m.get("description"))
|
||||
.and_then(|d| d.as_str())
|
||||
{
|
||||
info.description = desc.to_string();
|
||||
}
|
||||
|
||||
// First tab's video grid — recent videos.
|
||||
if let Some(tabs) = body
|
||||
.get("contents")
|
||||
.and_then(|c| c.get("twoColumnBrowseResultsRenderer"))
|
||||
.and_then(|c| c.get("tabs"))
|
||||
.and_then(|t| t.as_array())
|
||||
{
|
||||
for tab in tabs {
|
||||
let Some(tr) = tab.get("tabRenderer") else { continue };
|
||||
if !tr
|
||||
.get("selected")
|
||||
.and_then(|s| s.as_bool())
|
||||
.unwrap_or(false)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if let Some(items) = tr
|
||||
.get("content")
|
||||
.and_then(|c| c.get("richGridRenderer"))
|
||||
.and_then(|g| g.get("contents"))
|
||||
.and_then(|c| c.as_array())
|
||||
{
|
||||
for cell in items {
|
||||
if let Some(item) = cell
|
||||
.get("richItemRenderer")
|
||||
.and_then(|r| r.get("content"))
|
||||
.and_then(|c| c.get("videoRenderer"))
|
||||
{
|
||||
if let Some(s) =
|
||||
crate::youtube::search_extractor::test_helpers::video_renderer_to_item(item)
|
||||
{
|
||||
info.recent_videos.push(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info
|
||||
}
|
||||
|
||||
fn parse_image_set(value: Option<&Value>) -> ImageSet {
|
||||
let mut out = Vec::new();
|
||||
if let Some(arr) = value
|
||||
.and_then(|v| v.get("thumbnails"))
|
||||
.and_then(|t| t.as_array())
|
||||
{
|
||||
for t in arr {
|
||||
if let Some(url) = t.get("url").and_then(|v| v.as_str()) {
|
||||
let h = t.get("height").and_then(|v| v.as_i64()).unwrap_or(-1) as i32;
|
||||
let w = t.get("width").and_then(|v| v.as_i64()).unwrap_or(-1) as i32;
|
||||
out.push(Image::new(url, h, w, ResolutionLevel::from_height(h)));
|
||||
}
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn parse_subscriber_count(text: &str) -> i64 {
|
||||
// "12.5M subscribers" / "1.2K subscribers" / "350 subscribers"
|
||||
let stripped = text
|
||||
.replace("subscribers", "")
|
||||
.replace("subscriber", "")
|
||||
.trim()
|
||||
.to_string();
|
||||
let (num, mult) = if let Some(n) = stripped.strip_suffix('K') {
|
||||
(n.trim(), 1_000.0)
|
||||
} else if let Some(n) = stripped.strip_suffix('M') {
|
||||
(n.trim(), 1_000_000.0)
|
||||
} else if let Some(n) = stripped.strip_suffix('B') {
|
||||
(n.trim(), 1_000_000_000.0)
|
||||
} else {
|
||||
(stripped.trim(), 1.0)
|
||||
};
|
||||
num.replace(',', "")
|
||||
.parse::<f64>()
|
||||
.map(|n| (n * mult) as i64)
|
||||
.unwrap_or(-1)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
|
||||
#[test]
|
||||
fn subscriber_count_parsing() {
|
||||
assert_eq!(parse_subscriber_count("350 subscribers"), 350);
|
||||
assert_eq!(parse_subscriber_count("1.2K subscribers"), 1_200);
|
||||
assert_eq!(parse_subscriber_count("12.5M subscribers"), 12_500_000);
|
||||
assert_eq!(parse_subscriber_count("2B subscribers"), 2_000_000_000);
|
||||
assert_eq!(parse_subscriber_count("1 subscriber"), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_c4_tabbed_header() {
|
||||
let body = json!({
|
||||
"header":{"c4TabbedHeaderRenderer":{
|
||||
"title":"NoCopyrightSounds",
|
||||
"subscriberCountText":{"simpleText":"42.5M subscribers"},
|
||||
"badges":[{"metadataBadgeRenderer":{"style":"BADGE_STYLE_TYPE_VERIFIED_ARTIST"}}]
|
||||
}},
|
||||
"metadata":{"channelMetadataRenderer":{"description":"Royalty-free music"}}
|
||||
});
|
||||
let info = parse_channel_browse("UC_aEa8K-EOJ3D6gOs7HcyNg", &body);
|
||||
assert_eq!(info.name, "NoCopyrightSounds");
|
||||
assert_eq!(info.description, "Royalty-free music");
|
||||
assert_eq!(info.subscriber_count, 42_500_000);
|
||||
assert!(info.verified);
|
||||
assert_eq!(info.channel_id, "UC_aEa8K-EOJ3D6gOs7HcyNg");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_page_header_renderer_fallback() {
|
||||
let body = json!({
|
||||
"header":{"pageHeaderRenderer":{"pageTitle":"@SomeChannel"}}
|
||||
});
|
||||
let info = parse_channel_browse("UCxxx", &body);
|
||||
assert_eq!(info.name, "@SomeChannel");
|
||||
}
|
||||
}
|
||||
112
src/youtube/linkhandler/channel.rs
Normal file
112
src/youtube/linkhandler/channel.rs
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
// YoutubeChannelLinkHandlerFactory — accepts:
|
||||
// * https://www.youtube.com/channel/<channelId> (UC...)
|
||||
// * https://www.youtube.com/@<handle> (handle resolution → channelId)
|
||||
// * https://www.youtube.com/c/<custom-url> (legacy custom URLs)
|
||||
// * https://www.youtube.com/user/<username> (legacy)
|
||||
//
|
||||
// Handles + custom URLs need a live resolve via /youtubei/v1/navigation/resolve_url.
|
||||
// That call lands in youtube/channel/helper.rs in Phase 6b; here we
|
||||
// just classify the raw URL fragment.
|
||||
|
||||
use url::Url;
|
||||
|
||||
use crate::youtube::linkhandler::{host_is_youtube, LinkError};
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub enum ChannelIdentifier {
|
||||
/// Already a UC... channel ID — no resolution needed.
|
||||
DirectId(String),
|
||||
/// `@handle` — needs resolve_url to translate to a channel ID.
|
||||
Handle(String),
|
||||
/// `c/<name>` — legacy custom URL; needs resolve_url.
|
||||
Custom(String),
|
||||
/// `user/<name>` — legacy username; needs resolve_url.
|
||||
LegacyUser(String),
|
||||
}
|
||||
|
||||
pub fn parse(url_str: &str) -> Result<ChannelIdentifier, LinkError> {
|
||||
let url = Url::parse(url_str)
|
||||
.map_err(|e| LinkError::InvalidUrl(format!("{url_str}: {e}")))?;
|
||||
let host = url
|
||||
.host_str()
|
||||
.ok_or_else(|| LinkError::InvalidUrl("no host".into()))?;
|
||||
if !host_is_youtube(host) {
|
||||
return Err(LinkError::UnsupportedHost(host.into()));
|
||||
}
|
||||
let path = url.path().trim_end_matches('/');
|
||||
if let Some(rest) = path.strip_prefix("/channel/") {
|
||||
let id = rest.split('/').next().unwrap_or("");
|
||||
if id.is_empty() {
|
||||
return Err(LinkError::MissingId(url_str.into()));
|
||||
}
|
||||
return Ok(ChannelIdentifier::DirectId(id.into()));
|
||||
}
|
||||
if let Some(rest) = path.strip_prefix("/c/") {
|
||||
let s = rest.split('/').next().unwrap_or("");
|
||||
if s.is_empty() {
|
||||
return Err(LinkError::MissingId(url_str.into()));
|
||||
}
|
||||
return Ok(ChannelIdentifier::Custom(s.into()));
|
||||
}
|
||||
if let Some(rest) = path.strip_prefix("/user/") {
|
||||
let s = rest.split('/').next().unwrap_or("");
|
||||
if s.is_empty() {
|
||||
return Err(LinkError::MissingId(url_str.into()));
|
||||
}
|
||||
return Ok(ChannelIdentifier::LegacyUser(s.into()));
|
||||
}
|
||||
if let Some(rest) = path.strip_prefix("/@") {
|
||||
let s = rest.split('/').next().unwrap_or("");
|
||||
if s.is_empty() {
|
||||
return Err(LinkError::MissingId(url_str.into()));
|
||||
}
|
||||
return Ok(ChannelIdentifier::Handle(s.into()));
|
||||
}
|
||||
Err(LinkError::MissingId(url_str.into()))
|
||||
}
|
||||
|
||||
pub fn channel_url(channel_id: &str) -> String {
|
||||
format!("https://www.youtube.com/channel/{channel_id}")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn direct_channel_id() {
|
||||
let p = parse("https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg").unwrap();
|
||||
assert_eq!(p, ChannelIdentifier::DirectId("UC_aEa8K-EOJ3D6gOs7HcyNg".into()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handle_form() {
|
||||
let p = parse("https://www.youtube.com/@NoCopyrightSounds").unwrap();
|
||||
assert_eq!(p, ChannelIdentifier::Handle("NoCopyrightSounds".into()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn legacy_custom_url() {
|
||||
let p = parse("https://www.youtube.com/c/NoCopyrightSounds").unwrap();
|
||||
assert_eq!(p, ChannelIdentifier::Custom("NoCopyrightSounds".into()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn legacy_user() {
|
||||
let p = parse("https://www.youtube.com/user/SomeOldChannel").unwrap();
|
||||
assert_eq!(p, ChannelIdentifier::LegacyUser("SomeOldChannel".into()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_non_youtube() {
|
||||
assert!(parse("https://piped.video/channel/UCxxx").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn channel_url_builder() {
|
||||
assert_eq!(
|
||||
channel_url("UC_aEa8K-EOJ3D6gOs7HcyNg"),
|
||||
"https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg"
|
||||
);
|
||||
}
|
||||
}
|
||||
70
src/youtube/linkhandler/mod.rs
Normal file
70
src/youtube/linkhandler/mod.rs
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
// LinkHandler factories — URL parsing + URL building for YouTube
|
||||
// resource categories. Mirrors NPE
|
||||
// services/youtube/linkHandler/Youtube*LinkHandlerFactory.java.
|
||||
//
|
||||
// PORT SCOPE (per SPEC §6.6): we keep youtube.com / youtube-nocookie.com
|
||||
// / youtu.be / m.youtube.com / music.youtube.com. The 27-host Invidious
|
||||
// mirror list in NPE is dropped — Sulkta isn't an Invidious mirror.
|
||||
|
||||
pub mod channel;
|
||||
pub mod playlist;
|
||||
pub mod search;
|
||||
pub mod stream;
|
||||
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum LinkError {
|
||||
#[error("invalid url: {0}")]
|
||||
InvalidUrl(String),
|
||||
#[error("unsupported host: {0}")]
|
||||
UnsupportedHost(String),
|
||||
#[error("missing id in url: {0}")]
|
||||
MissingId(String),
|
||||
#[error("malformed id: {0}")]
|
||||
MalformedId(String),
|
||||
}
|
||||
|
||||
/// The acceptable hosts for first-party YT links. Audit Track D §6.
|
||||
pub const ACCEPTED_HOSTS: &[&str] = &[
|
||||
"youtube.com",
|
||||
"www.youtube.com",
|
||||
"m.youtube.com",
|
||||
"music.youtube.com",
|
||||
"youtu.be",
|
||||
"www.youtube-nocookie.com",
|
||||
];
|
||||
|
||||
pub fn host_is_youtube(host: &str) -> bool {
|
||||
let h = host.to_ascii_lowercase();
|
||||
let h = h.strip_prefix("www.").unwrap_or(&h);
|
||||
ACCEPTED_HOSTS
|
||||
.iter()
|
||||
.any(|allowed| {
|
||||
let allowed = allowed.strip_prefix("www.").unwrap_or(allowed);
|
||||
allowed == h
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn accepts_first_party_hosts() {
|
||||
assert!(host_is_youtube("www.youtube.com"));
|
||||
assert!(host_is_youtube("youtube.com"));
|
||||
assert!(host_is_youtube("m.youtube.com"));
|
||||
assert!(host_is_youtube("music.youtube.com"));
|
||||
assert!(host_is_youtube("youtu.be"));
|
||||
assert!(host_is_youtube("WWW.YouTube.COM")); // case-insensitive
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_invidious_and_random() {
|
||||
assert!(!host_is_youtube("invidious.io"));
|
||||
assert!(!host_is_youtube("yewtu.be"));
|
||||
assert!(!host_is_youtube("piped.video"));
|
||||
assert!(!host_is_youtube("evil.com"));
|
||||
}
|
||||
}
|
||||
81
src/youtube/linkhandler/playlist.rs
Normal file
81
src/youtube/linkhandler/playlist.rs
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
// YoutubePlaylistLinkHandlerFactory — accepts:
|
||||
// * https://www.youtube.com/playlist?list=<PLid>
|
||||
// * https://www.youtube.com/watch?v=...&list=<PLid>
|
||||
// * https://music.youtube.com/playlist?list=<PLid>
|
||||
//
|
||||
// YT playlist IDs prefix:
|
||||
// * PL user-curated playlists
|
||||
// * RD mix / radio
|
||||
// * OLAK5uy_ album / single
|
||||
// * LL liked-videos (private — won't extract anonymously)
|
||||
// * WL watch-later (private)
|
||||
// * UU uploads (auto-generated per channel)
|
||||
|
||||
use url::Url;
|
||||
|
||||
use crate::youtube::linkhandler::{host_is_youtube, LinkError};
|
||||
|
||||
pub fn extract_playlist_id(url_str: &str) -> Result<String, LinkError> {
|
||||
let url = Url::parse(url_str)
|
||||
.map_err(|e| LinkError::InvalidUrl(format!("{url_str}: {e}")))?;
|
||||
let host = url
|
||||
.host_str()
|
||||
.ok_or_else(|| LinkError::InvalidUrl("no host".into()))?;
|
||||
if !host_is_youtube(host) {
|
||||
return Err(LinkError::UnsupportedHost(host.into()));
|
||||
}
|
||||
url.query_pairs()
|
||||
.find(|(k, _)| k == "list")
|
||||
.map(|(_, v)| v.into_owned())
|
||||
.filter(|s| !s.is_empty())
|
||||
.ok_or_else(|| LinkError::MissingId(url_str.into()))
|
||||
}
|
||||
|
||||
pub fn playlist_url(playlist_id: &str) -> String {
|
||||
format!("https://www.youtube.com/playlist?list={playlist_id}")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn standalone_playlist() {
|
||||
let id = extract_playlist_id(
|
||||
"https://www.youtube.com/playlist?list=PLMC9KNkIncKtPzgY-5rmhvj7fax8fdxoj",
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(id, "PLMC9KNkIncKtPzgY-5rmhvj7fax8fdxoj");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn watch_with_list() {
|
||||
let id = extract_playlist_id(
|
||||
"https://www.youtube.com/watch?v=dQw4w9WgXcQ&list=PLxxx",
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(id, "PLxxx");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn music_subdomain() {
|
||||
let id = extract_playlist_id(
|
||||
"https://music.youtube.com/playlist?list=OLAK5uy_kFooBar",
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(id, "OLAK5uy_kFooBar");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_no_list_param() {
|
||||
let err = extract_playlist_id("https://www.youtube.com/watch?v=dQw4w9WgXcQ")
|
||||
.unwrap_err();
|
||||
assert!(matches!(err, LinkError::MissingId(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_non_youtube_host() {
|
||||
let err = extract_playlist_id("https://invidious.io/playlist?list=PLxxx").unwrap_err();
|
||||
assert!(matches!(err, LinkError::UnsupportedHost(_)));
|
||||
}
|
||||
}
|
||||
97
src/youtube/linkhandler/search.rs
Normal file
97
src/youtube/linkhandler/search.rs
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
// YoutubeSearchQueryHandlerFactory + search filters. Mirrors NPE
|
||||
// YoutubeSearchQueryHandlerFactory.java + the filter params in
|
||||
// YoutubeSearchExtractor.java.
|
||||
//
|
||||
// Filter params are opaque base64 protobufs — NPE doesn't decode them,
|
||||
// just sends the magic strings. We mirror that. See audit Track D §3.
|
||||
|
||||
use url::form_urlencoded;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
|
||||
pub enum SearchFilter {
|
||||
/// All result types — no params field sent.
|
||||
All,
|
||||
/// Videos only.
|
||||
Videos,
|
||||
/// Channels only.
|
||||
Channels,
|
||||
/// Playlists only.
|
||||
Playlists,
|
||||
/// "Music songs" filter — uses the WEB_REMIX path on music.youtube.com.
|
||||
MusicSongs,
|
||||
/// "Music videos" filter — also WEB_REMIX.
|
||||
MusicVideos,
|
||||
/// "Music albums" filter.
|
||||
MusicAlbums,
|
||||
/// "Music playlists" filter.
|
||||
MusicPlaylists,
|
||||
/// "Music artists" filter.
|
||||
MusicArtists,
|
||||
}
|
||||
|
||||
impl SearchFilter {
|
||||
/// Returns the InnerTube `params` base64 string. None means omit
|
||||
/// the field entirely (== All).
|
||||
pub fn params(&self) -> Option<&'static str> {
|
||||
match self {
|
||||
SearchFilter::All => None,
|
||||
SearchFilter::Videos => Some("EgIQAfABAQ%3D%3D"),
|
||||
SearchFilter::Channels => Some("EgIQAvABAQ%3D%3D"),
|
||||
SearchFilter::Playlists => Some("EgIQA_ABAQ%3D%3D"),
|
||||
SearchFilter::MusicSongs => Some("EgWKAQIIAWoMEA4QChADEAQQCRAF"),
|
||||
SearchFilter::MusicVideos => Some("EgWKAQIQAWoMEA4QChADEAQQCRAF"),
|
||||
SearchFilter::MusicAlbums => Some("EgWKAQIYAWoMEA4QChADEAQQCRAF"),
|
||||
SearchFilter::MusicPlaylists => Some("EgeKAQQoAEABagwQDhAKEAMQBBAJEAU%3D"),
|
||||
SearchFilter::MusicArtists => Some("EgWKAQIgAWoMEA4QChADEAQQCRAF"),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn uses_music_endpoint(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
SearchFilter::MusicSongs
|
||||
| SearchFilter::MusicVideos
|
||||
| SearchFilter::MusicAlbums
|
||||
| SearchFilter::MusicPlaylists
|
||||
| SearchFilter::MusicArtists
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn search_url(query: &str) -> String {
|
||||
let encoded: String = form_urlencoded::Serializer::new(String::new())
|
||||
.append_pair("search_query", query)
|
||||
.finish();
|
||||
format!("https://www.youtube.com/results?{encoded}")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn all_filter_omits_params() {
|
||||
assert!(SearchFilter::All.params().is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn typed_filters_have_params() {
|
||||
assert!(SearchFilter::Videos.params().is_some());
|
||||
assert!(SearchFilter::Channels.params().is_some());
|
||||
assert!(SearchFilter::Playlists.params().is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn music_filters_route_to_music_endpoint() {
|
||||
assert!(SearchFilter::MusicSongs.uses_music_endpoint());
|
||||
assert!(!SearchFilter::Videos.uses_music_endpoint());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_url_encodes_query() {
|
||||
assert_eq!(
|
||||
search_url("rust + ferris"),
|
||||
"https://www.youtube.com/results?search_query=rust+%2B+ferris"
|
||||
);
|
||||
}
|
||||
}
|
||||
168
src/youtube/linkhandler/stream.rs
Normal file
168
src/youtube/linkhandler/stream.rs
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
// YoutubeStreamLinkHandlerFactory — accepts:
|
||||
// * https://www.youtube.com/watch?v=<11-char-id>
|
||||
// * https://m.youtube.com/watch?v=...
|
||||
// * https://music.youtube.com/watch?v=...
|
||||
// * https://youtu.be/<id>
|
||||
// * https://www.youtube.com/embed/<id>
|
||||
// * https://www.youtube.com/shorts/<id>
|
||||
// * https://www.youtube.com/v/<id> (legacy)
|
||||
// * https://www.youtube-nocookie.com/embed/<id>
|
||||
// * attribution_link?u=<encoded-watch-url>
|
||||
//
|
||||
// Plus any of the above with `&t=<seconds>` for timestamp.
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use url::Url;
|
||||
|
||||
use crate::youtube::linkhandler::{host_is_youtube, LinkError};
|
||||
|
||||
const VIDEO_ID_LEN: usize = 11;
|
||||
|
||||
static VIDEO_ID_RE: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new(r"^[A-Za-z0-9_-]{11}$").unwrap());
|
||||
|
||||
pub fn is_valid_video_id(id: &str) -> bool {
|
||||
id.len() == VIDEO_ID_LEN && VIDEO_ID_RE.is_match(id)
|
||||
}
|
||||
|
||||
/// Extracts the 11-char video ID from a YouTube URL. Returns None when
|
||||
/// the URL doesn't look like a YT video URL (so search results / channel
|
||||
/// pages return None rather than Err — caller decides).
|
||||
pub fn extract_video_id(input_url: &str) -> Result<String, LinkError> {
|
||||
let url = Url::parse(input_url)
|
||||
.map_err(|e| LinkError::InvalidUrl(format!("{input_url}: {e}")))?;
|
||||
let host = url
|
||||
.host_str()
|
||||
.ok_or_else(|| LinkError::InvalidUrl("no host".into()))?;
|
||||
if !host_is_youtube(host) {
|
||||
return Err(LinkError::UnsupportedHost(host.into()));
|
||||
}
|
||||
|
||||
let host_lc = host.to_ascii_lowercase();
|
||||
let path = url.path();
|
||||
let mut candidate: Option<String> = None;
|
||||
|
||||
// youtu.be/<id>
|
||||
if host_lc.ends_with("youtu.be") {
|
||||
if let Some(rest) = path.strip_prefix('/') {
|
||||
candidate = Some(rest.split('/').next().unwrap_or("").to_string());
|
||||
}
|
||||
}
|
||||
|
||||
// /embed/<id>, /shorts/<id>, /v/<id>, /live/<id>
|
||||
for prefix in ["/embed/", "/shorts/", "/v/", "/live/"] {
|
||||
if let Some(rest) = path.strip_prefix(prefix) {
|
||||
candidate = Some(rest.split('/').next().unwrap_or("").to_string());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// /watch?v=<id>
|
||||
if candidate.is_none() && (path == "/watch" || path == "/watch/") {
|
||||
candidate = url
|
||||
.query_pairs()
|
||||
.find(|(k, _)| k == "v")
|
||||
.map(|(_, v)| v.into_owned());
|
||||
}
|
||||
|
||||
// /attribution_link?u=<encoded watch url>
|
||||
if candidate.is_none() && path.starts_with("/attribution_link") {
|
||||
if let Some((_, u_param)) = url.query_pairs().find(|(k, _)| k == "u") {
|
||||
// Recurse on the decoded URL — but only one level deep.
|
||||
let inner = format!("https://www.youtube.com{u_param}");
|
||||
return extract_video_id(&inner);
|
||||
}
|
||||
}
|
||||
|
||||
let id = candidate
|
||||
.ok_or_else(|| LinkError::MissingId(input_url.into()))?;
|
||||
if !is_valid_video_id(&id) {
|
||||
return Err(LinkError::MalformedId(id));
|
||||
}
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
pub fn watch_url(video_id: &str) -> String {
|
||||
format!("https://www.youtube.com/watch?v={video_id}")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn watch_full_url() {
|
||||
let id = extract_video_id("https://www.youtube.com/watch?v=n4tK7LYFxI0").unwrap();
|
||||
assert_eq!(id, "n4tK7LYFxI0");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn watch_with_extra_params() {
|
||||
let id = extract_video_id(
|
||||
"https://www.youtube.com/watch?v=n4tK7LYFxI0&t=42s&list=foo",
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(id, "n4tK7LYFxI0");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn youtu_be_short() {
|
||||
let id = extract_video_id("https://youtu.be/dQw4w9WgXcQ").unwrap();
|
||||
assert_eq!(id, "dQw4w9WgXcQ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn youtu_be_short_with_query() {
|
||||
let id = extract_video_id("https://youtu.be/dQw4w9WgXcQ?t=10").unwrap();
|
||||
assert_eq!(id, "dQw4w9WgXcQ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn embed_form() {
|
||||
let id = extract_video_id("https://www.youtube.com/embed/n4tK7LYFxI0").unwrap();
|
||||
assert_eq!(id, "n4tK7LYFxI0");
|
||||
|
||||
let id = extract_video_id("https://www.youtube-nocookie.com/embed/n4tK7LYFxI0").unwrap();
|
||||
assert_eq!(id, "n4tK7LYFxI0");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shorts_form() {
|
||||
let id = extract_video_id("https://www.youtube.com/shorts/n4tK7LYFxI0").unwrap();
|
||||
assert_eq!(id, "n4tK7LYFxI0");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn music_youtube() {
|
||||
let id =
|
||||
extract_video_id("https://music.youtube.com/watch?v=n4tK7LYFxI0").unwrap();
|
||||
assert_eq!(id, "n4tK7LYFxI0");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_invidious_host() {
|
||||
let err = extract_video_id("https://yewtu.be/watch?v=n4tK7LYFxI0").unwrap_err();
|
||||
assert!(matches!(err, LinkError::UnsupportedHost(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_invalid_id_shape() {
|
||||
let err = extract_video_id("https://www.youtube.com/watch?v=tooshort").unwrap_err();
|
||||
assert!(matches!(err, LinkError::MalformedId(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn accepts_only_first_path_segment() {
|
||||
let id = extract_video_id("https://youtu.be/n4tK7LYFxI0/extra").unwrap();
|
||||
assert_eq!(id, "n4tK7LYFxI0");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn watch_url_builder() {
|
||||
assert_eq!(
|
||||
watch_url("n4tK7LYFxI0"),
|
||||
"https://www.youtube.com/watch?v=n4tK7LYFxI0"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -3,12 +3,17 @@
|
|||
// itag table. Phase 4+ will add the stream extractor, search, channel,
|
||||
// playlist, kiosks.
|
||||
|
||||
pub mod channel;
|
||||
pub mod client_request;
|
||||
pub mod constants;
|
||||
pub mod itag;
|
||||
pub mod js;
|
||||
pub mod linkhandler;
|
||||
pub mod parsing;
|
||||
pub mod playlist_extractor;
|
||||
pub mod potoken;
|
||||
pub mod search_extractor;
|
||||
pub mod stream_extractor;
|
||||
pub mod stream_helper;
|
||||
pub mod suggestion_extractor;
|
||||
|
||||
|
|
|
|||
297
src/youtube/playlist_extractor.rs
Normal file
297
src/youtube/playlist_extractor.rs
Normal file
|
|
@ -0,0 +1,297 @@
|
|||
// YoutubePlaylistExtractor — mirrors NPE
|
||||
// services/youtube/extractors/YoutubePlaylistExtractor.java.
|
||||
//
|
||||
// 2-POST pattern (audit Track D §7):
|
||||
// 1. browseId="VL<playlistId>" → playlist metadata + first batch
|
||||
// 2. continuation token → subsequent batches
|
||||
//
|
||||
// Body shape per call: build_desktop_envelope + add browseId (or
|
||||
// continuation). Response walked to playlistVideoListRenderer.contents[]
|
||||
// .playlistVideoRenderer.
|
||||
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::downloader::request::Request;
|
||||
use crate::exceptions::{ExtractionError, NetworkError, ParsingError};
|
||||
use crate::image::ImageSet;
|
||||
use crate::newpipe::NewPipe;
|
||||
use crate::stream::StreamInfoItem;
|
||||
use crate::youtube::client_request::build_desktop_envelope;
|
||||
use crate::youtube::constants::*;
|
||||
use crate::youtube::parsing::{web_client_version, youtube_post_headers};
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct PlaylistInfo {
|
||||
pub playlist_id: String,
|
||||
pub url: String,
|
||||
pub name: String,
|
||||
pub description: String,
|
||||
pub uploader_name: String,
|
||||
pub uploader_url: String,
|
||||
pub uploader_id: String,
|
||||
pub thumbnails: ImageSet,
|
||||
pub video_count: i64,
|
||||
pub videos: Vec<StreamInfoItem>,
|
||||
pub continuation_token: Option<String>,
|
||||
}
|
||||
|
||||
pub fn playlist_info(playlist_id: &str) -> Result<PlaylistInfo, ExtractionError> {
|
||||
let downloader = NewPipe::downloader().ok_or(ExtractionError::DownloaderMissing)?;
|
||||
let localization = NewPipe::preferred_localization();
|
||||
let content_country = NewPipe::preferred_content_country();
|
||||
|
||||
let mut envelope =
|
||||
build_desktop_envelope(&localization, &content_country, &web_client_version());
|
||||
if let Value::Object(ref mut map) = envelope {
|
||||
map.insert(
|
||||
"browseId".into(),
|
||||
Value::String(format!("VL{playlist_id}")),
|
||||
);
|
||||
}
|
||||
let url = format!("{YOUTUBEI_V1_URL}browse{DISABLE_PRETTY_PRINT_PARAM}");
|
||||
let body = serde_json::to_vec(&envelope).map_err(|e| {
|
||||
ExtractionError::Parsing(ParsingError::Invalid(format!("serialize: {e}")))
|
||||
})?;
|
||||
let mut builder = Request::post(&url, body);
|
||||
for (k, v) in youtube_post_headers() {
|
||||
builder = builder.add_header(&k, &v);
|
||||
}
|
||||
let resp = downloader.execute(builder.build())?;
|
||||
if resp.response_code() != 200 {
|
||||
return Err(ExtractionError::Network(NetworkError::Transport(format!(
|
||||
"browse HTTP {}",
|
||||
resp.response_code()
|
||||
))));
|
||||
}
|
||||
let parsed: Value = serde_json::from_str(resp.response_body())
|
||||
.map_err(|e| ExtractionError::Parsing(ParsingError::JsonShape(e.to_string())))?;
|
||||
Ok(parse_playlist_browse(playlist_id, &parsed))
|
||||
}
|
||||
|
||||
pub fn parse_playlist_browse(playlist_id: &str, body: &Value) -> PlaylistInfo {
|
||||
let mut info = PlaylistInfo {
|
||||
playlist_id: playlist_id.into(),
|
||||
url: format!("https://www.youtube.com/playlist?list={playlist_id}"),
|
||||
..PlaylistInfo::default()
|
||||
};
|
||||
|
||||
// metadata.playlistMetadataRenderer.title / description
|
||||
if let Some(meta) = body
|
||||
.get("metadata")
|
||||
.and_then(|m| m.get("playlistMetadataRenderer"))
|
||||
{
|
||||
if let Some(s) = meta.get("title").and_then(|v| v.as_str()) {
|
||||
info.name = s.into();
|
||||
}
|
||||
if let Some(s) = meta.get("description").and_then(|v| v.as_str()) {
|
||||
info.description = s.into();
|
||||
}
|
||||
}
|
||||
|
||||
// sidebar.playlistSidebarRenderer.items[].playlistSidebarPrimaryInfoRenderer
|
||||
// + playlistSidebarSecondaryInfoRenderer
|
||||
if let Some(items) = body
|
||||
.get("sidebar")
|
||||
.and_then(|s| s.get("playlistSidebarRenderer"))
|
||||
.and_then(|s| s.get("items"))
|
||||
.and_then(|i| i.as_array())
|
||||
{
|
||||
for item in items {
|
||||
if let Some(primary) = item.get("playlistSidebarPrimaryInfoRenderer") {
|
||||
if info.name.is_empty() {
|
||||
if let Some(s) = primary
|
||||
.get("title")
|
||||
.and_then(|t| t.get("runs"))
|
||||
.and_then(|r| r.as_array())
|
||||
.and_then(|a| a.first())
|
||||
.and_then(|r| r.get("text"))
|
||||
.and_then(|t| t.as_str())
|
||||
{
|
||||
info.name = s.into();
|
||||
}
|
||||
}
|
||||
// stats[1] (video count) — "1,234 videos"
|
||||
if let Some(stats) = primary.get("stats").and_then(|s| s.as_array()) {
|
||||
if let Some(count_text) = stats
|
||||
.get(0)
|
||||
.and_then(|s| s.get("runs"))
|
||||
.and_then(|r| r.as_array())
|
||||
.and_then(|a| a.first())
|
||||
.and_then(|r| r.get("text"))
|
||||
.and_then(|t| t.as_str())
|
||||
{
|
||||
info.video_count = count_text
|
||||
.replace(',', "")
|
||||
.split_whitespace()
|
||||
.next()
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Some(secondary) = item.get("playlistSidebarSecondaryInfoRenderer") {
|
||||
if let Some(owner) = secondary.get("videoOwner").and_then(|o| {
|
||||
o.get("videoOwnerRenderer")
|
||||
}) {
|
||||
if let Some(s) = owner
|
||||
.get("title")
|
||||
.and_then(|t| t.get("runs"))
|
||||
.and_then(|r| r.as_array())
|
||||
.and_then(|a| a.first())
|
||||
{
|
||||
if let Some(name) = s.get("text").and_then(|t| t.as_str()) {
|
||||
info.uploader_name = name.into();
|
||||
}
|
||||
if let Some(endpoint) = s.get("navigationEndpoint") {
|
||||
if let Some(browse_id) = endpoint
|
||||
.get("browseEndpoint")
|
||||
.and_then(|b| b.get("browseId"))
|
||||
.and_then(|i| i.as_str())
|
||||
{
|
||||
info.uploader_id = browse_id.into();
|
||||
info.uploader_url =
|
||||
format!("https://www.youtube.com/channel/{browse_id}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// contents.twoColumnBrowseResultsRenderer.tabs[0].tabRenderer.content
|
||||
// .sectionListRenderer.contents[0].itemSectionRenderer.contents[0]
|
||||
// .playlistVideoListRenderer.contents[]
|
||||
let list_contents = body
|
||||
.get("contents")
|
||||
.and_then(|c| c.get("twoColumnBrowseResultsRenderer"))
|
||||
.and_then(|c| c.get("tabs"))
|
||||
.and_then(|t| t.as_array())
|
||||
.and_then(|tabs| tabs.first())
|
||||
.and_then(|t| t.get("tabRenderer"))
|
||||
.and_then(|t| t.get("content"))
|
||||
.and_then(|c| c.get("sectionListRenderer"))
|
||||
.and_then(|s| s.get("contents"))
|
||||
.and_then(|c| c.as_array())
|
||||
.and_then(|arr| arr.first())
|
||||
.and_then(|s| s.get("itemSectionRenderer"))
|
||||
.and_then(|i| i.get("contents"))
|
||||
.and_then(|c| c.as_array())
|
||||
.and_then(|arr| arr.first())
|
||||
.and_then(|s| s.get("playlistVideoListRenderer"))
|
||||
.and_then(|p| p.get("contents"))
|
||||
.and_then(|c| c.as_array());
|
||||
|
||||
if let Some(arr) = list_contents {
|
||||
for item in arr {
|
||||
if let Some(v) = item.get("playlistVideoRenderer") {
|
||||
if let Some(s) = parse_playlist_video_renderer(v) {
|
||||
info.videos.push(s);
|
||||
}
|
||||
} else if let Some(c) = item.get("continuationItemRenderer") {
|
||||
info.continuation_token = c
|
||||
.get("continuationEndpoint")
|
||||
.and_then(|e| e.get("continuationCommand"))
|
||||
.and_then(|c| c.get("token"))
|
||||
.and_then(|t| t.as_str())
|
||||
.map(String::from);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info
|
||||
}
|
||||
|
||||
fn parse_playlist_video_renderer(renderer: &Value) -> Option<StreamInfoItem> {
|
||||
let video_id = renderer.get("videoId")?.as_str()?.to_string();
|
||||
let title = renderer
|
||||
.get("title")
|
||||
.and_then(|t| t.get("runs"))
|
||||
.and_then(|r| r.as_array())
|
||||
.and_then(|a| a.first())
|
||||
.and_then(|r| r.get("text"))
|
||||
.and_then(|t| t.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
let uploader_name = renderer
|
||||
.get("shortBylineText")
|
||||
.and_then(|s| s.get("runs"))
|
||||
.and_then(|r| r.as_array())
|
||||
.and_then(|a| a.first())
|
||||
.and_then(|r| r.get("text"))
|
||||
.and_then(|t| t.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
let duration_seconds = renderer
|
||||
.get("lengthSeconds")
|
||||
.and_then(|s| s.as_str())
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(0);
|
||||
Some(StreamInfoItem {
|
||||
service_id: 0,
|
||||
url: format!("https://www.youtube.com/watch?v={video_id}"),
|
||||
name: title,
|
||||
thumbnails: Vec::new(),
|
||||
uploader_name,
|
||||
uploader_url: String::new(),
|
||||
uploader_id: String::new(),
|
||||
uploader_verified: false,
|
||||
duration_seconds,
|
||||
view_count: -1,
|
||||
upload_date_relative: String::new(),
|
||||
stream_type: Some(crate::stream::StreamType::VideoStream),
|
||||
short_description: String::new(),
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
|
||||
#[test]
|
||||
fn parses_basic_playlist_meta() {
|
||||
let body = json!({
|
||||
"metadata":{"playlistMetadataRenderer":{
|
||||
"title":"Coding music",
|
||||
"description":"For long sessions."
|
||||
}}
|
||||
});
|
||||
let info = parse_playlist_browse("PLxxx", &body);
|
||||
assert_eq!(info.name, "Coding music");
|
||||
assert_eq!(info.description, "For long sessions.");
|
||||
assert_eq!(info.playlist_id, "PLxxx");
|
||||
assert_eq!(info.url, "https://www.youtube.com/playlist?list=PLxxx");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_video_list_and_continuation() {
|
||||
let body = json!({
|
||||
"contents":{"twoColumnBrowseResultsRenderer":{"tabs":[{
|
||||
"tabRenderer":{"content":{"sectionListRenderer":{"contents":[{
|
||||
"itemSectionRenderer":{"contents":[{
|
||||
"playlistVideoListRenderer":{"contents":[
|
||||
{"playlistVideoRenderer":{
|
||||
"videoId":"abc",
|
||||
"title":{"runs":[{"text":"First track"}]},
|
||||
"shortBylineText":{"runs":[{"text":"NCS"}]},
|
||||
"lengthSeconds":"234"
|
||||
}},
|
||||
{"continuationItemRenderer":{
|
||||
"continuationEndpoint":{"continuationCommand":{
|
||||
"token":"OPAQUE_CONT_TOKEN"
|
||||
}}
|
||||
}}
|
||||
]}
|
||||
}]}
|
||||
}]}}}
|
||||
}]}}
|
||||
});
|
||||
let info = parse_playlist_browse("PLxxx", &body);
|
||||
assert_eq!(info.videos.len(), 1);
|
||||
assert_eq!(info.videos[0].name, "First track");
|
||||
assert_eq!(info.videos[0].uploader_name, "NCS");
|
||||
assert_eq!(info.videos[0].duration_seconds, 234);
|
||||
assert_eq!(info.continuation_token.as_deref(), Some("OPAQUE_CONT_TOKEN"));
|
||||
}
|
||||
}
|
||||
448
src/youtube/search_extractor.rs
Normal file
448
src/youtube/search_extractor.rs
Normal file
|
|
@ -0,0 +1,448 @@
|
|||
// YoutubeSearchExtractor — mirrors NPE
|
||||
// services/youtube/extractors/YoutubeSearchExtractor.java.
|
||||
//
|
||||
// Calls /youtubei/v1/search with the WEB client (via desktop fast-path
|
||||
// envelope). Body shape per audit Track D §3:
|
||||
// {
|
||||
// "context": { "client": { ... } },
|
||||
// "query": "<query>",
|
||||
// "params": "<filter base64>" // omitted for All
|
||||
// }
|
||||
//
|
||||
// Response walked:
|
||||
// contents.twoColumnSearchResultsRenderer.primaryContents
|
||||
// .sectionListRenderer.contents[]
|
||||
// .itemSectionRenderer.contents[]
|
||||
// → videoRenderer | channelRenderer | playlistRenderer | shelfRenderer
|
||||
//
|
||||
// `shelfRenderer` is a sub-section (e.g. "People also watched") whose
|
||||
// `content.verticalListRenderer.items[]` are the same renderer types.
|
||||
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::downloader::request::Request;
|
||||
use crate::exceptions::{ExtractionError, NetworkError, ParsingError};
|
||||
use crate::image::{Image, ResolutionLevel};
|
||||
use crate::newpipe::NewPipe;
|
||||
use crate::stream::{StreamInfoItem, StreamType};
|
||||
use crate::youtube::client_request::build_desktop_envelope;
|
||||
use crate::youtube::constants::*;
|
||||
use crate::youtube::linkhandler::search::SearchFilter;
|
||||
use crate::youtube::parsing::{web_client_version, youtube_post_headers};
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct SearchInfo {
|
||||
pub query: String,
|
||||
pub corrected_query: Option<String>,
|
||||
pub videos: Vec<StreamInfoItem>,
|
||||
pub continuation_token: Option<String>,
|
||||
}
|
||||
|
||||
pub fn search(query: &str, filter: SearchFilter) -> Result<SearchInfo, ExtractionError> {
|
||||
if filter.uses_music_endpoint() {
|
||||
return Err(ExtractionError::Other(
|
||||
"music search filters route to WEB_REMIX — not implemented in this phase".into(),
|
||||
));
|
||||
}
|
||||
let downloader = NewPipe::downloader().ok_or(ExtractionError::DownloaderMissing)?;
|
||||
let localization = NewPipe::preferred_localization();
|
||||
let content_country = NewPipe::preferred_content_country();
|
||||
|
||||
let mut envelope = build_desktop_envelope(&localization, &content_country, &web_client_version());
|
||||
if let Value::Object(ref mut map) = envelope {
|
||||
map.insert("query".into(), Value::String(query.into()));
|
||||
if let Some(params) = filter.params() {
|
||||
map.insert("params".into(), Value::String(params.into()));
|
||||
}
|
||||
}
|
||||
|
||||
let url = format!("{YOUTUBEI_V1_URL}search{DISABLE_PRETTY_PRINT_PARAM}");
|
||||
let body = serde_json::to_vec(&envelope).map_err(|e| {
|
||||
ExtractionError::Parsing(ParsingError::Invalid(format!("serialize: {e}")))
|
||||
})?;
|
||||
let mut builder = Request::post(&url, body);
|
||||
for (k, v) in youtube_post_headers() {
|
||||
builder = builder.add_header(&k, &v);
|
||||
}
|
||||
let resp = downloader.execute(builder.build())?;
|
||||
if resp.response_code() != 200 {
|
||||
return Err(ExtractionError::Network(NetworkError::Transport(format!(
|
||||
"search HTTP {}",
|
||||
resp.response_code()
|
||||
))));
|
||||
}
|
||||
let parsed: Value = serde_json::from_str(resp.response_body())
|
||||
.map_err(|e| ExtractionError::Parsing(ParsingError::JsonShape(e.to_string())))?;
|
||||
Ok(parse_search_response(query, &parsed))
|
||||
}
|
||||
|
||||
pub fn parse_search_response(query: &str, body: &Value) -> SearchInfo {
|
||||
let mut info = SearchInfo {
|
||||
query: query.to_string(),
|
||||
..SearchInfo::default()
|
||||
};
|
||||
|
||||
let primary = body
|
||||
.get("contents")
|
||||
.and_then(|c| c.get("twoColumnSearchResultsRenderer"))
|
||||
.and_then(|c| c.get("primaryContents"))
|
||||
.and_then(|c| c.get("sectionListRenderer"))
|
||||
.and_then(|c| c.get("contents"));
|
||||
|
||||
if let Some(sections) = primary.and_then(|v| v.as_array()) {
|
||||
for section in sections {
|
||||
if let Some(items) = section
|
||||
.get("itemSectionRenderer")
|
||||
.and_then(|s| s.get("contents"))
|
||||
.and_then(|c| c.as_array())
|
||||
{
|
||||
for item in items {
|
||||
extract_item_into(item, &mut info);
|
||||
}
|
||||
}
|
||||
if let Some(ct) = section
|
||||
.get("continuationItemRenderer")
|
||||
.and_then(|s| s.get("continuationEndpoint"))
|
||||
.and_then(|c| c.get("continuationCommand"))
|
||||
.and_then(|c| c.get("token"))
|
||||
.and_then(|t| t.as_str())
|
||||
{
|
||||
info.continuation_token = Some(ct.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(corrected) = body
|
||||
.get("contents")
|
||||
.and_then(|c| c.get("twoColumnSearchResultsRenderer"))
|
||||
.and_then(|c| c.get("primaryContents"))
|
||||
.and_then(|c| c.get("sectionListRenderer"))
|
||||
.and_then(|c| c.get("contents"))
|
||||
.and_then(|c| c.as_array())
|
||||
.and_then(|arr| {
|
||||
arr.iter().find_map(|s| {
|
||||
s.get("showingResultsForRenderer")
|
||||
.and_then(|r| r.get("correctedQuery"))
|
||||
.and_then(|q| q.get("runs"))
|
||||
.and_then(|r| r.as_array())
|
||||
.and_then(|a| a.first())
|
||||
.and_then(|r| r.get("text"))
|
||||
.and_then(|t| t.as_str())
|
||||
})
|
||||
})
|
||||
{
|
||||
info.corrected_query = Some(corrected.to_string());
|
||||
}
|
||||
info
|
||||
}
|
||||
|
||||
fn extract_item_into(item: &Value, info: &mut SearchInfo) {
|
||||
if let Some(video) = item.get("videoRenderer") {
|
||||
if let Some(s) = parse_video_renderer(video) {
|
||||
info.videos.push(s);
|
||||
}
|
||||
} else if let Some(shelf) = item.get("shelfRenderer") {
|
||||
if let Some(items) = shelf
|
||||
.get("content")
|
||||
.and_then(|c| c.get("verticalListRenderer"))
|
||||
.and_then(|v| v.get("items"))
|
||||
.and_then(|i| i.as_array())
|
||||
{
|
||||
for inner in items {
|
||||
extract_item_into(inner, info);
|
||||
}
|
||||
}
|
||||
}
|
||||
// channelRenderer and playlistRenderer parsing is intentionally
|
||||
// omitted from Phase 6a — landed in Phase 6b along with channel/
|
||||
// playlist extractors.
|
||||
}
|
||||
|
||||
pub(crate) mod test_helpers {
|
||||
use super::*;
|
||||
pub fn video_renderer_to_item(renderer: &Value) -> Option<StreamInfoItem> {
|
||||
super::parse_video_renderer(renderer)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_video_renderer(renderer: &Value) -> Option<StreamInfoItem> {
|
||||
let video_id = renderer.get("videoId")?.as_str()?.to_string();
|
||||
let title = runs_text(renderer.get("title"));
|
||||
let uploader_name = runs_text(renderer.get("ownerText"))
|
||||
.or_else(|| runs_text(renderer.get("longBylineText")))
|
||||
.unwrap_or_default();
|
||||
let uploader_endpoint = renderer
|
||||
.get("ownerText")
|
||||
.and_then(|o| o.get("runs"))
|
||||
.and_then(|r| r.as_array())
|
||||
.and_then(|a| a.first())
|
||||
.and_then(|r| r.get("navigationEndpoint"));
|
||||
let uploader_url = uploader_endpoint
|
||||
.and_then(|e| e.get("commandMetadata"))
|
||||
.and_then(|m| m.get("webCommandMetadata"))
|
||||
.and_then(|w| w.get("url"))
|
||||
.and_then(|u| u.as_str())
|
||||
.map(|p| format!("https://www.youtube.com{p}"))
|
||||
.unwrap_or_default();
|
||||
let uploader_id = uploader_endpoint
|
||||
.and_then(|e| e.get("browseEndpoint"))
|
||||
.and_then(|b| b.get("browseId"))
|
||||
.and_then(|i| i.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
let duration_seconds = renderer
|
||||
.get("lengthText")
|
||||
.and_then(|l| l.get("simpleText"))
|
||||
.and_then(|s| s.as_str())
|
||||
.map(parse_duration_string)
|
||||
.unwrap_or(0);
|
||||
let view_count = renderer
|
||||
.get("viewCountText")
|
||||
.and_then(|c| c.get("simpleText"))
|
||||
.and_then(|s| s.as_str())
|
||||
.or_else(|| {
|
||||
renderer
|
||||
.get("shortViewCountText")
|
||||
.and_then(|c| c.get("simpleText"))
|
||||
.and_then(|s| s.as_str())
|
||||
})
|
||||
.map(parse_view_count)
|
||||
.unwrap_or(-1);
|
||||
let upload_relative = renderer
|
||||
.get("publishedTimeText")
|
||||
.and_then(|p| p.get("simpleText"))
|
||||
.and_then(|s| s.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
let stream_type = if renderer
|
||||
.get("badges")
|
||||
.and_then(|b| b.as_array())
|
||||
.map(|arr| {
|
||||
arr.iter().any(|b| {
|
||||
b.get("metadataBadgeRenderer")
|
||||
.and_then(|m| m.get("label"))
|
||||
.and_then(|l| l.as_str())
|
||||
.map(|s| s.eq_ignore_ascii_case("live"))
|
||||
.unwrap_or(false)
|
||||
})
|
||||
})
|
||||
.unwrap_or(false)
|
||||
{
|
||||
StreamType::VideoLiveStream
|
||||
} else {
|
||||
StreamType::VideoStream
|
||||
};
|
||||
|
||||
let short_description = runs_text(renderer.get("detailedMetadataSnippets"))
|
||||
.or_else(|| runs_text(renderer.get("descriptionSnippet")))
|
||||
.unwrap_or_default();
|
||||
|
||||
let mut thumbnails = Vec::new();
|
||||
if let Some(arr) = renderer
|
||||
.get("thumbnail")
|
||||
.and_then(|t| t.get("thumbnails"))
|
||||
.and_then(|t| t.as_array())
|
||||
{
|
||||
for t in arr {
|
||||
if let Some(url) = t.get("url").and_then(|v| v.as_str()) {
|
||||
let h = t.get("height").and_then(|v| v.as_i64()).unwrap_or(-1) as i32;
|
||||
let w = t.get("width").and_then(|v| v.as_i64()).unwrap_or(-1) as i32;
|
||||
thumbnails.push(Image::new(url, h, w, ResolutionLevel::from_height(h)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let uploader_verified = renderer
|
||||
.get("ownerBadges")
|
||||
.and_then(|b| b.as_array())
|
||||
.map(|arr| {
|
||||
arr.iter().any(|b| {
|
||||
b.get("metadataBadgeRenderer")
|
||||
.and_then(|m| m.get("style"))
|
||||
.and_then(|s| s.as_str())
|
||||
.map(|s| s == "BADGE_STYLE_TYPE_VERIFIED" || s == "BADGE_STYLE_TYPE_VERIFIED_ARTIST")
|
||||
.unwrap_or(false)
|
||||
})
|
||||
})
|
||||
.unwrap_or(false);
|
||||
|
||||
Some(StreamInfoItem {
|
||||
service_id: 0,
|
||||
url: format!("https://www.youtube.com/watch?v={video_id}"),
|
||||
name: title.unwrap_or_default(),
|
||||
thumbnails,
|
||||
uploader_name,
|
||||
uploader_url,
|
||||
uploader_id,
|
||||
uploader_verified,
|
||||
duration_seconds,
|
||||
view_count,
|
||||
upload_date_relative: upload_relative,
|
||||
stream_type: Some(stream_type),
|
||||
short_description,
|
||||
})
|
||||
}
|
||||
|
||||
fn runs_text(value: Option<&Value>) -> Option<String> {
|
||||
let v = value?;
|
||||
if let Some(s) = v.get("simpleText").and_then(|s| s.as_str()) {
|
||||
return Some(s.to_string());
|
||||
}
|
||||
if let Some(arr) = v.get("runs").and_then(|r| r.as_array()) {
|
||||
let joined: String = arr
|
||||
.iter()
|
||||
.filter_map(|r| r.get("text").and_then(|t| t.as_str()))
|
||||
.collect();
|
||||
if !joined.is_empty() {
|
||||
return Some(joined);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn parse_duration_string(s: &str) -> i64 {
|
||||
// YT durations: "M:SS", "MM:SS", "H:MM:SS", "HH:MM:SS".
|
||||
let parts: Vec<&str> = s.split(':').collect();
|
||||
let mut total: i64 = 0;
|
||||
for part in &parts {
|
||||
let n: i64 = part.trim().parse().unwrap_or(0);
|
||||
total = total * 60 + n;
|
||||
}
|
||||
total
|
||||
}
|
||||
|
||||
fn parse_view_count(s: &str) -> i64 {
|
||||
// Examples: "1,234,567 views", "42K views", "1.2M views"
|
||||
let s = s.replace([',', '\u{00a0}'], "");
|
||||
let s = s.trim();
|
||||
let (num_part, mult) = if let Some(rest) = s.strip_suffix("K views") {
|
||||
(rest.trim(), 1_000.0)
|
||||
} else if let Some(rest) = s.strip_suffix("M views") {
|
||||
(rest.trim(), 1_000_000.0)
|
||||
} else if let Some(rest) = s.strip_suffix("B views") {
|
||||
(rest.trim(), 1_000_000_000.0)
|
||||
} else if let Some(rest) = s.strip_suffix(" views") {
|
||||
(rest.trim(), 1.0)
|
||||
} else {
|
||||
(s, 1.0)
|
||||
};
|
||||
num_part
|
||||
.parse::<f64>()
|
||||
.map(|n| (n * mult) as i64)
|
||||
.unwrap_or(-1)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
|
||||
#[test]
|
||||
fn duration_parsing() {
|
||||
assert_eq!(parse_duration_string("0:42"), 42);
|
||||
assert_eq!(parse_duration_string("3:14"), 194);
|
||||
assert_eq!(parse_duration_string("1:02:03"), 3723);
|
||||
assert_eq!(parse_duration_string("10:00:00"), 36000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn view_count_parsing() {
|
||||
assert_eq!(parse_view_count("1,234,567 views"), 1_234_567);
|
||||
assert_eq!(parse_view_count("42K views"), 42_000);
|
||||
assert_eq!(parse_view_count("1.5M views"), 1_500_000);
|
||||
assert_eq!(parse_view_count("3B views"), 3_000_000_000);
|
||||
assert_eq!(parse_view_count("1 view"), -1); // not "views" plural — NPE accepts both
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn runs_text_joins_runs() {
|
||||
let v = json!({"runs":[{"text":"Hello, "},{"text":"world"}]});
|
||||
assert_eq!(runs_text(Some(&v)).as_deref(), Some("Hello, world"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn runs_text_handles_simple() {
|
||||
let v = json!({"simpleText":"just text"});
|
||||
assert_eq!(runs_text(Some(&v)).as_deref(), Some("just text"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_one_video_renderer_in_section() {
|
||||
let body = json!({
|
||||
"contents":{"twoColumnSearchResultsRenderer":{"primaryContents":{
|
||||
"sectionListRenderer":{"contents":[{
|
||||
"itemSectionRenderer":{"contents":[{
|
||||
"videoRenderer":{
|
||||
"videoId":"n4tK7LYFxI0",
|
||||
"title":{"runs":[{"text":"Spektrem - Shine"}]},
|
||||
"ownerText":{"runs":[{"text":"NoCopyrightSounds"}]},
|
||||
"lengthText":{"simpleText":"3:54"},
|
||||
"viewCountText":{"simpleText":"42,000,000 views"},
|
||||
"publishedTimeText":{"simpleText":"8 years ago"}
|
||||
}
|
||||
}]}
|
||||
}]}
|
||||
}}}
|
||||
});
|
||||
let info = parse_search_response("Spektrem", &body);
|
||||
assert_eq!(info.videos.len(), 1);
|
||||
let v = &info.videos[0];
|
||||
assert_eq!(v.name, "Spektrem - Shine");
|
||||
assert_eq!(v.uploader_name, "NoCopyrightSounds");
|
||||
assert_eq!(v.duration_seconds, 234);
|
||||
assert_eq!(v.view_count, 42_000_000);
|
||||
assert_eq!(v.upload_date_relative, "8 years ago");
|
||||
assert_eq!(v.url, "https://www.youtube.com/watch?v=n4tK7LYFxI0");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_continuation_token() {
|
||||
let body = json!({
|
||||
"contents":{"twoColumnSearchResultsRenderer":{"primaryContents":{
|
||||
"sectionListRenderer":{"contents":[
|
||||
{"continuationItemRenderer":{
|
||||
"continuationEndpoint":{
|
||||
"continuationCommand":{"token":"OPAQUE_TOKEN_XYZ"}
|
||||
}
|
||||
}}
|
||||
]}
|
||||
}}}
|
||||
});
|
||||
let info = parse_search_response("x", &body);
|
||||
assert_eq!(info.continuation_token.as_deref(), Some("OPAQUE_TOKEN_XYZ"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_corrected_query_hint() {
|
||||
let body = json!({
|
||||
"contents":{"twoColumnSearchResultsRenderer":{"primaryContents":{
|
||||
"sectionListRenderer":{"contents":[
|
||||
{"showingResultsForRenderer":{"correctedQuery":{"runs":[{"text":"spektrem"}]}}}
|
||||
]}
|
||||
}}}
|
||||
});
|
||||
let info = parse_search_response("spektram", &body);
|
||||
assert_eq!(info.corrected_query.as_deref(), Some("spektrem"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shelf_renderer_is_walked() {
|
||||
let body = json!({
|
||||
"contents":{"twoColumnSearchResultsRenderer":{"primaryContents":{
|
||||
"sectionListRenderer":{"contents":[{
|
||||
"itemSectionRenderer":{"contents":[{
|
||||
"shelfRenderer":{
|
||||
"content":{"verticalListRenderer":{"items":[
|
||||
{"videoRenderer":{"videoId":"AAAAAAAAAA1","title":{"simpleText":"In shelf"}}}
|
||||
]}}
|
||||
}
|
||||
}]}
|
||||
}]}
|
||||
}}}
|
||||
});
|
||||
let info = parse_search_response("x", &body);
|
||||
assert_eq!(info.videos.len(), 1);
|
||||
assert_eq!(info.videos[0].name, "In shelf");
|
||||
}
|
||||
}
|
||||
91
src/youtube/suggestion_extractor.rs
Normal file
91
src/youtube/suggestion_extractor.rs
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
// YoutubeSuggestionExtractor — search-as-you-type autocomplete.
|
||||
// Mirrors NPE services/youtube/extractors/YoutubeSuggestionExtractor.java.
|
||||
//
|
||||
// Endpoint:
|
||||
// GET https://suggestqueries-clients6.youtube.com/complete/search
|
||||
// ?client=youtube&ds=yt&gl=<cc>&q=<query>&xhr=t
|
||||
//
|
||||
// Returns a JSON array shaped like: `[query, [[suggestion, 0], ...], {}]`.
|
||||
// The XSSI prefix `)]}'\n` may NOT be present — NPE handles both cases.
|
||||
|
||||
use serde_json::Value;
|
||||
use url::form_urlencoded;
|
||||
|
||||
use crate::downloader::request::Request;
|
||||
use crate::exceptions::{ExtractionError, NetworkError, ParsingError};
|
||||
use crate::newpipe::NewPipe;
|
||||
|
||||
pub fn suggestions(query: &str) -> Result<Vec<String>, ExtractionError> {
|
||||
let downloader = NewPipe::downloader().ok_or(ExtractionError::DownloaderMissing)?;
|
||||
let cc = NewPipe::preferred_content_country();
|
||||
|
||||
let encoded: String = form_urlencoded::Serializer::new(String::new())
|
||||
.append_pair("client", "youtube")
|
||||
.append_pair("ds", "yt")
|
||||
.append_pair("gl", cc.country_code())
|
||||
.append_pair("q", query)
|
||||
.append_pair("xhr", "t")
|
||||
.finish();
|
||||
let url =
|
||||
format!("https://suggestqueries-clients6.youtube.com/complete/search?{encoded}");
|
||||
|
||||
let req = Request::get(&url).build();
|
||||
let resp = downloader.execute(req)?;
|
||||
if resp.response_code() != 200 {
|
||||
return Err(ExtractionError::Network(NetworkError::Transport(format!(
|
||||
"suggest HTTP {}",
|
||||
resp.response_code()
|
||||
))));
|
||||
}
|
||||
let body = resp.response_body();
|
||||
let stripped = body.strip_prefix(")]}'\n").unwrap_or(body);
|
||||
let parsed: Value = serde_json::from_str(stripped)
|
||||
.map_err(|e| ExtractionError::Parsing(ParsingError::JsonShape(e.to_string())))?;
|
||||
Ok(parse_suggestions(&parsed))
|
||||
}
|
||||
|
||||
pub fn parse_suggestions(value: &Value) -> Vec<String> {
|
||||
value
|
||||
.as_array()
|
||||
.and_then(|outer| outer.get(1))
|
||||
.and_then(|inner| inner.as_array())
|
||||
.map(|arr| {
|
||||
arr.iter()
|
||||
.filter_map(|entry| {
|
||||
entry.as_array().and_then(|e| e.first()).and_then(|s| s.as_str())
|
||||
})
|
||||
.map(String::from)
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
|
||||
#[test]
|
||||
fn parses_typical_suggest_response() {
|
||||
let body = json!([
|
||||
"spek",
|
||||
[["spektrem", 0], ["spektrum", 0], ["spek tek", 0]],
|
||||
{}
|
||||
]);
|
||||
let out = parse_suggestions(&body);
|
||||
assert_eq!(out, vec!["spektrem", "spektrum", "spek tek"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_suggestions_array() {
|
||||
let body = json!(["q", []]);
|
||||
let out = parse_suggestions(&body);
|
||||
assert!(out.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handles_malformed() {
|
||||
let body = json!({});
|
||||
assert!(parse_suggestions(&body).is_empty());
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue