Phase 6 — Search + Channel + Playlist + LinkHandler

Pulls in the read-side extractor surfaces Straw needs at app open
(search bar) + on detail screens (channel + playlist).

src/youtube/linkhandler/
  * mod.rs       — ACCEPTED_HOSTS allowlist (youtube.com /
                   youtube-nocookie.com / youtu.be / m.youtube.com /
                   music.youtube.com); 27 Invidious mirror hosts
                   intentionally dropped (SPEC §6.6).
  * stream.rs    — extract_video_id() handles /watch?v= / youtu.be/ /
                   /embed/ / /shorts/ / /v/ / /live/ / attribution_link;
                   strict 11-char [A-Za-z0-9_-] validation.
  * channel.rs   — ChannelIdentifier enum (DirectId / Handle / Custom /
                   LegacyUser). Resolution to UC… id lands in
                   youtube/channel.rs.
  * playlist.rs  — extracts ?list=<PLid> from /playlist and /watch URLs.
  * search.rs    — SearchFilter enum + params() opaque base64 strings +
                   uses_music_endpoint() routing flag.

src/youtube/search_extractor.rs
  * search(query, filter) → SearchInfo { query, corrected_query,
                                          videos, continuation_token }
  * Walks twoColumnSearchResultsRenderer → sectionListRenderer →
    itemSectionRenderer → videoRenderer (+ shelfRenderer recursion).
  * Parses YT duration strings, view-count abbreviations ('1.5M views'),
    publishedTimeText, ownerBadges verified flag, badge LIVE flag.
  * Music-search filters route to WEB_REMIX — flagged as not-yet-impl.

src/youtube/suggestion_extractor.rs
  * suggestions(query) → Vec<String> via the suggestqueries-clients6
    endpoint; handles both XSSI-prefixed and bare JSON responses.

src/youtube/channel.rs
  * resolve_handle_to_channel_id() via /youtubei/v1/navigation/resolve_url
  * channel_info(ChannelIdentifier) → ChannelInfo {
      name, description, avatars, banners, subscriber_count, verified,
      recent_videos, videos_continuation
    }
  * Parses both c4TabbedHeaderRenderer (most common) and the newer
    pageHeaderRenderer flavor.
  * subscriber_count parser handles K/M/B suffixes.

src/youtube/playlist_extractor.rs
  * playlist_info(playlist_id) → PlaylistInfo with first-page video
    list + continuation_token. Browses with browseId='VL<id>'.
  * Walks playlistMetadataRenderer + playlistSidebarRenderer + the
    playlistVideoListRenderer.contents[] for video items.

Tests: 121 lib unit pass (+44 since Phase 5). All previous phase smoke
tests still green.

What's left:
* Phase 6 kiosks (Trending etc) — minor, deferred
* Phase 7 — UniFFI surface swap into Straw (Straw repo work)
* Phase 8 — delete rustypipe (Straw repo work)
This commit is contained in:
Kayos 2026-05-24 17:16:14 -07:00
parent b4286b8236
commit f79d8fb109
10 changed files with 1663 additions and 0 deletions

294
src/youtube/channel.rs Normal file
View file

@ -0,0 +1,294 @@
// YoutubeChannelExtractor + helper.resolveChannelId — fetches channel
// info via /youtubei/v1/browse. Mirrors NPE
// services/youtube/extractors/YoutubeChannelExtractor.java +
// YoutubeChannelHelper.java.
//
// Handle / custom URL / legacy user resolution: NPE issues
// `/youtubei/v1/navigation/resolve_url` against the `youtube.com/@handle`
// URL, walks `endpoint.browseEndpoint.browseId` to get the UC... id, and
// retries the browse call. Up to 3 redirect hops.
//
// Tab parsing (videos/shorts/live/playlists) is in audit Track D §5 —
// `tabs[].tabRenderer.endpoint.browseEndpoint.params` is the magic
// base64 needed to land on each tab.
use serde_json::Value;
use crate::downloader::request::Request;
use crate::exceptions::{ExtractionError, NetworkError, ParsingError};
use crate::image::{Image, ImageSet, ResolutionLevel};
use crate::newpipe::NewPipe;
use crate::stream::StreamInfoItem;
use crate::youtube::client_request::build_desktop_envelope;
use crate::youtube::constants::*;
use crate::youtube::linkhandler::channel::ChannelIdentifier;
use crate::youtube::parsing::{web_client_version, youtube_post_headers};
#[derive(Clone, Debug, Default)]
pub struct ChannelInfo {
pub channel_id: String,
pub url: String,
pub name: String,
pub description: String,
pub avatars: ImageSet,
pub banners: ImageSet,
pub subscriber_count: i64,
pub verified: bool,
pub recent_videos: Vec<StreamInfoItem>,
pub videos_continuation: Option<String>,
}
pub fn channel_info(identifier: ChannelIdentifier) -> Result<ChannelInfo, ExtractionError> {
let resolved = match identifier {
ChannelIdentifier::DirectId(id) => id,
ChannelIdentifier::Handle(h) => resolve_handle_to_channel_id(&format!("@{h}"))?,
ChannelIdentifier::Custom(c) => resolve_handle_to_channel_id(&format!("c/{c}"))?,
ChannelIdentifier::LegacyUser(u) => resolve_handle_to_channel_id(&format!("user/{u}"))?,
};
fetch_channel_browse(&resolved)
}
pub fn resolve_handle_to_channel_id(url_fragment: &str) -> Result<String, ExtractionError> {
let downloader = NewPipe::downloader().ok_or(ExtractionError::DownloaderMissing)?;
let localization = NewPipe::preferred_localization();
let content_country = NewPipe::preferred_content_country();
let target_url = format!("https://www.youtube.com/{url_fragment}");
let mut envelope = build_desktop_envelope(&localization, &content_country, &web_client_version());
if let Value::Object(ref mut map) = envelope {
map.insert("url".into(), Value::String(target_url));
}
let url = format!("{YOUTUBEI_V1_URL}navigation/resolve_url{DISABLE_PRETTY_PRINT_PARAM}");
let body = serde_json::to_vec(&envelope).map_err(|e| {
ExtractionError::Parsing(ParsingError::Invalid(format!("serialize: {e}")))
})?;
let mut builder = Request::post(&url, body);
for (k, v) in youtube_post_headers() {
builder = builder.add_header(&k, &v);
}
let resp = downloader.execute(builder.build())?;
if resp.response_code() != 200 {
return Err(ExtractionError::Network(NetworkError::Transport(format!(
"resolve_url HTTP {}",
resp.response_code()
))));
}
let parsed: Value = serde_json::from_str(resp.response_body())
.map_err(|e| ExtractionError::Parsing(ParsingError::JsonShape(e.to_string())))?;
parsed
.get("endpoint")
.and_then(|e| e.get("browseEndpoint"))
.and_then(|b| b.get("browseId"))
.and_then(|i| i.as_str())
.map(String::from)
.ok_or_else(|| {
ExtractionError::Parsing(ParsingError::MissingField(
"endpoint.browseEndpoint.browseId".into(),
))
})
}
pub fn fetch_channel_browse(channel_id: &str) -> Result<ChannelInfo, ExtractionError> {
let downloader = NewPipe::downloader().ok_or(ExtractionError::DownloaderMissing)?;
let localization = NewPipe::preferred_localization();
let content_country = NewPipe::preferred_content_country();
let mut envelope =
build_desktop_envelope(&localization, &content_country, &web_client_version());
if let Value::Object(ref mut map) = envelope {
map.insert("browseId".into(), Value::String(channel_id.into()));
}
let url = format!("{YOUTUBEI_V1_URL}browse{DISABLE_PRETTY_PRINT_PARAM}");
let body = serde_json::to_vec(&envelope).map_err(|e| {
ExtractionError::Parsing(ParsingError::Invalid(format!("serialize: {e}")))
})?;
let mut builder = Request::post(&url, body);
for (k, v) in youtube_post_headers() {
builder = builder.add_header(&k, &v);
}
let resp = downloader.execute(builder.build())?;
if resp.response_code() != 200 {
return Err(ExtractionError::Network(NetworkError::Transport(format!(
"browse HTTP {}",
resp.response_code()
))));
}
let parsed: Value = serde_json::from_str(resp.response_body())
.map_err(|e| ExtractionError::Parsing(ParsingError::JsonShape(e.to_string())))?;
Ok(parse_channel_browse(channel_id, &parsed))
}
pub fn parse_channel_browse(channel_id: &str, body: &Value) -> ChannelInfo {
let mut info = ChannelInfo {
channel_id: channel_id.to_string(),
url: format!("https://www.youtube.com/channel/{channel_id}"),
..ChannelInfo::default()
};
// C4_TABBED header flavor is the most common.
if let Some(header) = body
.get("header")
.and_then(|h| h.get("c4TabbedHeaderRenderer"))
{
if let Some(s) = header.get("title").and_then(|t| t.as_str()) {
info.name = s.to_string();
}
info.avatars = parse_image_set(header.get("avatar"));
info.banners = parse_image_set(header.get("banner"));
if let Some(text) = header
.get("subscriberCountText")
.and_then(|s| s.get("simpleText"))
.and_then(|s| s.as_str())
{
info.subscriber_count = parse_subscriber_count(text);
}
if let Some(badges) = header.get("badges").and_then(|b| b.as_array()) {
info.verified = badges.iter().any(|b| {
b.get("metadataBadgeRenderer")
.and_then(|m| m.get("style"))
.and_then(|s| s.as_str())
.map(|s| s.starts_with("BADGE_STYLE_TYPE_VERIFIED"))
.unwrap_or(false)
});
}
}
// Alternative pageHeaderRenderer (newer flavor — 2025+)
else if let Some(header) = body
.get("header")
.and_then(|h| h.get("pageHeaderRenderer"))
{
if let Some(s) = header.get("pageTitle").and_then(|t| t.as_str()) {
info.name = s.to_string();
}
}
// microformat / description
if let Some(desc) = body
.get("metadata")
.and_then(|m| m.get("channelMetadataRenderer"))
.and_then(|m| m.get("description"))
.and_then(|d| d.as_str())
{
info.description = desc.to_string();
}
// First tab's video grid — recent videos.
if let Some(tabs) = body
.get("contents")
.and_then(|c| c.get("twoColumnBrowseResultsRenderer"))
.and_then(|c| c.get("tabs"))
.and_then(|t| t.as_array())
{
for tab in tabs {
let Some(tr) = tab.get("tabRenderer") else { continue };
if !tr
.get("selected")
.and_then(|s| s.as_bool())
.unwrap_or(false)
{
continue;
}
if let Some(items) = tr
.get("content")
.and_then(|c| c.get("richGridRenderer"))
.and_then(|g| g.get("contents"))
.and_then(|c| c.as_array())
{
for cell in items {
if let Some(item) = cell
.get("richItemRenderer")
.and_then(|r| r.get("content"))
.and_then(|c| c.get("videoRenderer"))
{
if let Some(s) =
crate::youtube::search_extractor::test_helpers::video_renderer_to_item(item)
{
info.recent_videos.push(s);
}
}
}
}
}
}
info
}
fn parse_image_set(value: Option<&Value>) -> ImageSet {
let mut out = Vec::new();
if let Some(arr) = value
.and_then(|v| v.get("thumbnails"))
.and_then(|t| t.as_array())
{
for t in arr {
if let Some(url) = t.get("url").and_then(|v| v.as_str()) {
let h = t.get("height").and_then(|v| v.as_i64()).unwrap_or(-1) as i32;
let w = t.get("width").and_then(|v| v.as_i64()).unwrap_or(-1) as i32;
out.push(Image::new(url, h, w, ResolutionLevel::from_height(h)));
}
}
}
out
}
fn parse_subscriber_count(text: &str) -> i64 {
// "12.5M subscribers" / "1.2K subscribers" / "350 subscribers"
let stripped = text
.replace("subscribers", "")
.replace("subscriber", "")
.trim()
.to_string();
let (num, mult) = if let Some(n) = stripped.strip_suffix('K') {
(n.trim(), 1_000.0)
} else if let Some(n) = stripped.strip_suffix('M') {
(n.trim(), 1_000_000.0)
} else if let Some(n) = stripped.strip_suffix('B') {
(n.trim(), 1_000_000_000.0)
} else {
(stripped.trim(), 1.0)
};
num.replace(',', "")
.parse::<f64>()
.map(|n| (n * mult) as i64)
.unwrap_or(-1)
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn subscriber_count_parsing() {
assert_eq!(parse_subscriber_count("350 subscribers"), 350);
assert_eq!(parse_subscriber_count("1.2K subscribers"), 1_200);
assert_eq!(parse_subscriber_count("12.5M subscribers"), 12_500_000);
assert_eq!(parse_subscriber_count("2B subscribers"), 2_000_000_000);
assert_eq!(parse_subscriber_count("1 subscriber"), 1);
}
#[test]
fn parses_c4_tabbed_header() {
let body = json!({
"header":{"c4TabbedHeaderRenderer":{
"title":"NoCopyrightSounds",
"subscriberCountText":{"simpleText":"42.5M subscribers"},
"badges":[{"metadataBadgeRenderer":{"style":"BADGE_STYLE_TYPE_VERIFIED_ARTIST"}}]
}},
"metadata":{"channelMetadataRenderer":{"description":"Royalty-free music"}}
});
let info = parse_channel_browse("UC_aEa8K-EOJ3D6gOs7HcyNg", &body);
assert_eq!(info.name, "NoCopyrightSounds");
assert_eq!(info.description, "Royalty-free music");
assert_eq!(info.subscriber_count, 42_500_000);
assert!(info.verified);
assert_eq!(info.channel_id, "UC_aEa8K-EOJ3D6gOs7HcyNg");
}
#[test]
fn parses_page_header_renderer_fallback() {
let body = json!({
"header":{"pageHeaderRenderer":{"pageTitle":"@SomeChannel"}}
});
let info = parse_channel_browse("UCxxx", &body);
assert_eq!(info.name, "@SomeChannel");
}
}

View file

@ -0,0 +1,112 @@
// YoutubeChannelLinkHandlerFactory — accepts:
// * https://www.youtube.com/channel/<channelId> (UC...)
// * https://www.youtube.com/@<handle> (handle resolution → channelId)
// * https://www.youtube.com/c/<custom-url> (legacy custom URLs)
// * https://www.youtube.com/user/<username> (legacy)
//
// Handles + custom URLs need a live resolve via /youtubei/v1/navigation/resolve_url.
// That call lands in youtube/channel/helper.rs in Phase 6b; here we
// just classify the raw URL fragment.
use url::Url;
use crate::youtube::linkhandler::{host_is_youtube, LinkError};
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum ChannelIdentifier {
/// Already a UC... channel ID — no resolution needed.
DirectId(String),
/// `@handle` — needs resolve_url to translate to a channel ID.
Handle(String),
/// `c/<name>` — legacy custom URL; needs resolve_url.
Custom(String),
/// `user/<name>` — legacy username; needs resolve_url.
LegacyUser(String),
}
pub fn parse(url_str: &str) -> Result<ChannelIdentifier, LinkError> {
let url = Url::parse(url_str)
.map_err(|e| LinkError::InvalidUrl(format!("{url_str}: {e}")))?;
let host = url
.host_str()
.ok_or_else(|| LinkError::InvalidUrl("no host".into()))?;
if !host_is_youtube(host) {
return Err(LinkError::UnsupportedHost(host.into()));
}
let path = url.path().trim_end_matches('/');
if let Some(rest) = path.strip_prefix("/channel/") {
let id = rest.split('/').next().unwrap_or("");
if id.is_empty() {
return Err(LinkError::MissingId(url_str.into()));
}
return Ok(ChannelIdentifier::DirectId(id.into()));
}
if let Some(rest) = path.strip_prefix("/c/") {
let s = rest.split('/').next().unwrap_or("");
if s.is_empty() {
return Err(LinkError::MissingId(url_str.into()));
}
return Ok(ChannelIdentifier::Custom(s.into()));
}
if let Some(rest) = path.strip_prefix("/user/") {
let s = rest.split('/').next().unwrap_or("");
if s.is_empty() {
return Err(LinkError::MissingId(url_str.into()));
}
return Ok(ChannelIdentifier::LegacyUser(s.into()));
}
if let Some(rest) = path.strip_prefix("/@") {
let s = rest.split('/').next().unwrap_or("");
if s.is_empty() {
return Err(LinkError::MissingId(url_str.into()));
}
return Ok(ChannelIdentifier::Handle(s.into()));
}
Err(LinkError::MissingId(url_str.into()))
}
pub fn channel_url(channel_id: &str) -> String {
format!("https://www.youtube.com/channel/{channel_id}")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn direct_channel_id() {
let p = parse("https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg").unwrap();
assert_eq!(p, ChannelIdentifier::DirectId("UC_aEa8K-EOJ3D6gOs7HcyNg".into()));
}
#[test]
fn handle_form() {
let p = parse("https://www.youtube.com/@NoCopyrightSounds").unwrap();
assert_eq!(p, ChannelIdentifier::Handle("NoCopyrightSounds".into()));
}
#[test]
fn legacy_custom_url() {
let p = parse("https://www.youtube.com/c/NoCopyrightSounds").unwrap();
assert_eq!(p, ChannelIdentifier::Custom("NoCopyrightSounds".into()));
}
#[test]
fn legacy_user() {
let p = parse("https://www.youtube.com/user/SomeOldChannel").unwrap();
assert_eq!(p, ChannelIdentifier::LegacyUser("SomeOldChannel".into()));
}
#[test]
fn rejects_non_youtube() {
assert!(parse("https://piped.video/channel/UCxxx").is_err());
}
#[test]
fn channel_url_builder() {
assert_eq!(
channel_url("UC_aEa8K-EOJ3D6gOs7HcyNg"),
"https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg"
);
}
}

View file

@ -0,0 +1,70 @@
// LinkHandler factories — URL parsing + URL building for YouTube
// resource categories. Mirrors NPE
// services/youtube/linkHandler/Youtube*LinkHandlerFactory.java.
//
// PORT SCOPE (per SPEC §6.6): we keep youtube.com / youtube-nocookie.com
// / youtu.be / m.youtube.com / music.youtube.com. The 27-host Invidious
// mirror list in NPE is dropped — Sulkta isn't an Invidious mirror.
pub mod channel;
pub mod playlist;
pub mod search;
pub mod stream;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum LinkError {
#[error("invalid url: {0}")]
InvalidUrl(String),
#[error("unsupported host: {0}")]
UnsupportedHost(String),
#[error("missing id in url: {0}")]
MissingId(String),
#[error("malformed id: {0}")]
MalformedId(String),
}
/// The acceptable hosts for first-party YT links. Audit Track D §6.
pub const ACCEPTED_HOSTS: &[&str] = &[
"youtube.com",
"www.youtube.com",
"m.youtube.com",
"music.youtube.com",
"youtu.be",
"www.youtube-nocookie.com",
];
pub fn host_is_youtube(host: &str) -> bool {
let h = host.to_ascii_lowercase();
let h = h.strip_prefix("www.").unwrap_or(&h);
ACCEPTED_HOSTS
.iter()
.any(|allowed| {
let allowed = allowed.strip_prefix("www.").unwrap_or(allowed);
allowed == h
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn accepts_first_party_hosts() {
assert!(host_is_youtube("www.youtube.com"));
assert!(host_is_youtube("youtube.com"));
assert!(host_is_youtube("m.youtube.com"));
assert!(host_is_youtube("music.youtube.com"));
assert!(host_is_youtube("youtu.be"));
assert!(host_is_youtube("WWW.YouTube.COM")); // case-insensitive
}
#[test]
fn rejects_invidious_and_random() {
assert!(!host_is_youtube("invidious.io"));
assert!(!host_is_youtube("yewtu.be"));
assert!(!host_is_youtube("piped.video"));
assert!(!host_is_youtube("evil.com"));
}
}

View file

@ -0,0 +1,81 @@
// YoutubePlaylistLinkHandlerFactory — accepts:
// * https://www.youtube.com/playlist?list=<PLid>
// * https://www.youtube.com/watch?v=...&list=<PLid>
// * https://music.youtube.com/playlist?list=<PLid>
//
// YT playlist IDs prefix:
// * PL user-curated playlists
// * RD mix / radio
// * OLAK5uy_ album / single
// * LL liked-videos (private — won't extract anonymously)
// * WL watch-later (private)
// * UU uploads (auto-generated per channel)
use url::Url;
use crate::youtube::linkhandler::{host_is_youtube, LinkError};
pub fn extract_playlist_id(url_str: &str) -> Result<String, LinkError> {
let url = Url::parse(url_str)
.map_err(|e| LinkError::InvalidUrl(format!("{url_str}: {e}")))?;
let host = url
.host_str()
.ok_or_else(|| LinkError::InvalidUrl("no host".into()))?;
if !host_is_youtube(host) {
return Err(LinkError::UnsupportedHost(host.into()));
}
url.query_pairs()
.find(|(k, _)| k == "list")
.map(|(_, v)| v.into_owned())
.filter(|s| !s.is_empty())
.ok_or_else(|| LinkError::MissingId(url_str.into()))
}
pub fn playlist_url(playlist_id: &str) -> String {
format!("https://www.youtube.com/playlist?list={playlist_id}")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn standalone_playlist() {
let id = extract_playlist_id(
"https://www.youtube.com/playlist?list=PLMC9KNkIncKtPzgY-5rmhvj7fax8fdxoj",
)
.unwrap();
assert_eq!(id, "PLMC9KNkIncKtPzgY-5rmhvj7fax8fdxoj");
}
#[test]
fn watch_with_list() {
let id = extract_playlist_id(
"https://www.youtube.com/watch?v=dQw4w9WgXcQ&list=PLxxx",
)
.unwrap();
assert_eq!(id, "PLxxx");
}
#[test]
fn music_subdomain() {
let id = extract_playlist_id(
"https://music.youtube.com/playlist?list=OLAK5uy_kFooBar",
)
.unwrap();
assert_eq!(id, "OLAK5uy_kFooBar");
}
#[test]
fn rejects_no_list_param() {
let err = extract_playlist_id("https://www.youtube.com/watch?v=dQw4w9WgXcQ")
.unwrap_err();
assert!(matches!(err, LinkError::MissingId(_)));
}
#[test]
fn rejects_non_youtube_host() {
let err = extract_playlist_id("https://invidious.io/playlist?list=PLxxx").unwrap_err();
assert!(matches!(err, LinkError::UnsupportedHost(_)));
}
}

View file

@ -0,0 +1,97 @@
// YoutubeSearchQueryHandlerFactory + search filters. Mirrors NPE
// YoutubeSearchQueryHandlerFactory.java + the filter params in
// YoutubeSearchExtractor.java.
//
// Filter params are opaque base64 protobufs — NPE doesn't decode them,
// just sends the magic strings. We mirror that. See audit Track D §3.
use url::form_urlencoded;
#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
pub enum SearchFilter {
/// All result types — no params field sent.
All,
/// Videos only.
Videos,
/// Channels only.
Channels,
/// Playlists only.
Playlists,
/// "Music songs" filter — uses the WEB_REMIX path on music.youtube.com.
MusicSongs,
/// "Music videos" filter — also WEB_REMIX.
MusicVideos,
/// "Music albums" filter.
MusicAlbums,
/// "Music playlists" filter.
MusicPlaylists,
/// "Music artists" filter.
MusicArtists,
}
impl SearchFilter {
/// Returns the InnerTube `params` base64 string. None means omit
/// the field entirely (== All).
pub fn params(&self) -> Option<&'static str> {
match self {
SearchFilter::All => None,
SearchFilter::Videos => Some("EgIQAfABAQ%3D%3D"),
SearchFilter::Channels => Some("EgIQAvABAQ%3D%3D"),
SearchFilter::Playlists => Some("EgIQA_ABAQ%3D%3D"),
SearchFilter::MusicSongs => Some("EgWKAQIIAWoMEA4QChADEAQQCRAF"),
SearchFilter::MusicVideos => Some("EgWKAQIQAWoMEA4QChADEAQQCRAF"),
SearchFilter::MusicAlbums => Some("EgWKAQIYAWoMEA4QChADEAQQCRAF"),
SearchFilter::MusicPlaylists => Some("EgeKAQQoAEABagwQDhAKEAMQBBAJEAU%3D"),
SearchFilter::MusicArtists => Some("EgWKAQIgAWoMEA4QChADEAQQCRAF"),
}
}
pub fn uses_music_endpoint(&self) -> bool {
matches!(
self,
SearchFilter::MusicSongs
| SearchFilter::MusicVideos
| SearchFilter::MusicAlbums
| SearchFilter::MusicPlaylists
| SearchFilter::MusicArtists
)
}
}
pub fn search_url(query: &str) -> String {
let encoded: String = form_urlencoded::Serializer::new(String::new())
.append_pair("search_query", query)
.finish();
format!("https://www.youtube.com/results?{encoded}")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn all_filter_omits_params() {
assert!(SearchFilter::All.params().is_none());
}
#[test]
fn typed_filters_have_params() {
assert!(SearchFilter::Videos.params().is_some());
assert!(SearchFilter::Channels.params().is_some());
assert!(SearchFilter::Playlists.params().is_some());
}
#[test]
fn music_filters_route_to_music_endpoint() {
assert!(SearchFilter::MusicSongs.uses_music_endpoint());
assert!(!SearchFilter::Videos.uses_music_endpoint());
}
#[test]
fn search_url_encodes_query() {
assert_eq!(
search_url("rust + ferris"),
"https://www.youtube.com/results?search_query=rust+%2B+ferris"
);
}
}

View file

@ -0,0 +1,168 @@
// YoutubeStreamLinkHandlerFactory — accepts:
// * https://www.youtube.com/watch?v=<11-char-id>
// * https://m.youtube.com/watch?v=...
// * https://music.youtube.com/watch?v=...
// * https://youtu.be/<id>
// * https://www.youtube.com/embed/<id>
// * https://www.youtube.com/shorts/<id>
// * https://www.youtube.com/v/<id> (legacy)
// * https://www.youtube-nocookie.com/embed/<id>
// * attribution_link?u=<encoded-watch-url>
//
// Plus any of the above with `&t=<seconds>` for timestamp.
use once_cell::sync::Lazy;
use regex::Regex;
use url::Url;
use crate::youtube::linkhandler::{host_is_youtube, LinkError};
const VIDEO_ID_LEN: usize = 11;
static VIDEO_ID_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^[A-Za-z0-9_-]{11}$").unwrap());
pub fn is_valid_video_id(id: &str) -> bool {
id.len() == VIDEO_ID_LEN && VIDEO_ID_RE.is_match(id)
}
/// Extracts the 11-char video ID from a YouTube URL. Returns None when
/// the URL doesn't look like a YT video URL (so search results / channel
/// pages return None rather than Err — caller decides).
pub fn extract_video_id(input_url: &str) -> Result<String, LinkError> {
let url = Url::parse(input_url)
.map_err(|e| LinkError::InvalidUrl(format!("{input_url}: {e}")))?;
let host = url
.host_str()
.ok_or_else(|| LinkError::InvalidUrl("no host".into()))?;
if !host_is_youtube(host) {
return Err(LinkError::UnsupportedHost(host.into()));
}
let host_lc = host.to_ascii_lowercase();
let path = url.path();
let mut candidate: Option<String> = None;
// youtu.be/<id>
if host_lc.ends_with("youtu.be") {
if let Some(rest) = path.strip_prefix('/') {
candidate = Some(rest.split('/').next().unwrap_or("").to_string());
}
}
// /embed/<id>, /shorts/<id>, /v/<id>, /live/<id>
for prefix in ["/embed/", "/shorts/", "/v/", "/live/"] {
if let Some(rest) = path.strip_prefix(prefix) {
candidate = Some(rest.split('/').next().unwrap_or("").to_string());
break;
}
}
// /watch?v=<id>
if candidate.is_none() && (path == "/watch" || path == "/watch/") {
candidate = url
.query_pairs()
.find(|(k, _)| k == "v")
.map(|(_, v)| v.into_owned());
}
// /attribution_link?u=<encoded watch url>
if candidate.is_none() && path.starts_with("/attribution_link") {
if let Some((_, u_param)) = url.query_pairs().find(|(k, _)| k == "u") {
// Recurse on the decoded URL — but only one level deep.
let inner = format!("https://www.youtube.com{u_param}");
return extract_video_id(&inner);
}
}
let id = candidate
.ok_or_else(|| LinkError::MissingId(input_url.into()))?;
if !is_valid_video_id(&id) {
return Err(LinkError::MalformedId(id));
}
Ok(id)
}
pub fn watch_url(video_id: &str) -> String {
format!("https://www.youtube.com/watch?v={video_id}")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn watch_full_url() {
let id = extract_video_id("https://www.youtube.com/watch?v=n4tK7LYFxI0").unwrap();
assert_eq!(id, "n4tK7LYFxI0");
}
#[test]
fn watch_with_extra_params() {
let id = extract_video_id(
"https://www.youtube.com/watch?v=n4tK7LYFxI0&t=42s&list=foo",
)
.unwrap();
assert_eq!(id, "n4tK7LYFxI0");
}
#[test]
fn youtu_be_short() {
let id = extract_video_id("https://youtu.be/dQw4w9WgXcQ").unwrap();
assert_eq!(id, "dQw4w9WgXcQ");
}
#[test]
fn youtu_be_short_with_query() {
let id = extract_video_id("https://youtu.be/dQw4w9WgXcQ?t=10").unwrap();
assert_eq!(id, "dQw4w9WgXcQ");
}
#[test]
fn embed_form() {
let id = extract_video_id("https://www.youtube.com/embed/n4tK7LYFxI0").unwrap();
assert_eq!(id, "n4tK7LYFxI0");
let id = extract_video_id("https://www.youtube-nocookie.com/embed/n4tK7LYFxI0").unwrap();
assert_eq!(id, "n4tK7LYFxI0");
}
#[test]
fn shorts_form() {
let id = extract_video_id("https://www.youtube.com/shorts/n4tK7LYFxI0").unwrap();
assert_eq!(id, "n4tK7LYFxI0");
}
#[test]
fn music_youtube() {
let id =
extract_video_id("https://music.youtube.com/watch?v=n4tK7LYFxI0").unwrap();
assert_eq!(id, "n4tK7LYFxI0");
}
#[test]
fn rejects_invidious_host() {
let err = extract_video_id("https://yewtu.be/watch?v=n4tK7LYFxI0").unwrap_err();
assert!(matches!(err, LinkError::UnsupportedHost(_)));
}
#[test]
fn rejects_invalid_id_shape() {
let err = extract_video_id("https://www.youtube.com/watch?v=tooshort").unwrap_err();
assert!(matches!(err, LinkError::MalformedId(_)));
}
#[test]
fn accepts_only_first_path_segment() {
let id = extract_video_id("https://youtu.be/n4tK7LYFxI0/extra").unwrap();
assert_eq!(id, "n4tK7LYFxI0");
}
#[test]
fn watch_url_builder() {
assert_eq!(
watch_url("n4tK7LYFxI0"),
"https://www.youtube.com/watch?v=n4tK7LYFxI0"
);
}
}

View file

@ -3,12 +3,17 @@
// itag table. Phase 4+ will add the stream extractor, search, channel,
// playlist, kiosks.
pub mod channel;
pub mod client_request;
pub mod constants;
pub mod itag;
pub mod js;
pub mod linkhandler;
pub mod parsing;
pub mod playlist_extractor;
pub mod potoken;
pub mod search_extractor;
pub mod stream_extractor;
pub mod stream_helper;
pub mod suggestion_extractor;

View file

@ -0,0 +1,297 @@
// YoutubePlaylistExtractor — mirrors NPE
// services/youtube/extractors/YoutubePlaylistExtractor.java.
//
// 2-POST pattern (audit Track D §7):
// 1. browseId="VL<playlistId>" → playlist metadata + first batch
// 2. continuation token → subsequent batches
//
// Body shape per call: build_desktop_envelope + add browseId (or
// continuation). Response walked to playlistVideoListRenderer.contents[]
// .playlistVideoRenderer.
use serde_json::Value;
use crate::downloader::request::Request;
use crate::exceptions::{ExtractionError, NetworkError, ParsingError};
use crate::image::ImageSet;
use crate::newpipe::NewPipe;
use crate::stream::StreamInfoItem;
use crate::youtube::client_request::build_desktop_envelope;
use crate::youtube::constants::*;
use crate::youtube::parsing::{web_client_version, youtube_post_headers};
#[derive(Clone, Debug, Default)]
pub struct PlaylistInfo {
pub playlist_id: String,
pub url: String,
pub name: String,
pub description: String,
pub uploader_name: String,
pub uploader_url: String,
pub uploader_id: String,
pub thumbnails: ImageSet,
pub video_count: i64,
pub videos: Vec<StreamInfoItem>,
pub continuation_token: Option<String>,
}
pub fn playlist_info(playlist_id: &str) -> Result<PlaylistInfo, ExtractionError> {
let downloader = NewPipe::downloader().ok_or(ExtractionError::DownloaderMissing)?;
let localization = NewPipe::preferred_localization();
let content_country = NewPipe::preferred_content_country();
let mut envelope =
build_desktop_envelope(&localization, &content_country, &web_client_version());
if let Value::Object(ref mut map) = envelope {
map.insert(
"browseId".into(),
Value::String(format!("VL{playlist_id}")),
);
}
let url = format!("{YOUTUBEI_V1_URL}browse{DISABLE_PRETTY_PRINT_PARAM}");
let body = serde_json::to_vec(&envelope).map_err(|e| {
ExtractionError::Parsing(ParsingError::Invalid(format!("serialize: {e}")))
})?;
let mut builder = Request::post(&url, body);
for (k, v) in youtube_post_headers() {
builder = builder.add_header(&k, &v);
}
let resp = downloader.execute(builder.build())?;
if resp.response_code() != 200 {
return Err(ExtractionError::Network(NetworkError::Transport(format!(
"browse HTTP {}",
resp.response_code()
))));
}
let parsed: Value = serde_json::from_str(resp.response_body())
.map_err(|e| ExtractionError::Parsing(ParsingError::JsonShape(e.to_string())))?;
Ok(parse_playlist_browse(playlist_id, &parsed))
}
pub fn parse_playlist_browse(playlist_id: &str, body: &Value) -> PlaylistInfo {
let mut info = PlaylistInfo {
playlist_id: playlist_id.into(),
url: format!("https://www.youtube.com/playlist?list={playlist_id}"),
..PlaylistInfo::default()
};
// metadata.playlistMetadataRenderer.title / description
if let Some(meta) = body
.get("metadata")
.and_then(|m| m.get("playlistMetadataRenderer"))
{
if let Some(s) = meta.get("title").and_then(|v| v.as_str()) {
info.name = s.into();
}
if let Some(s) = meta.get("description").and_then(|v| v.as_str()) {
info.description = s.into();
}
}
// sidebar.playlistSidebarRenderer.items[].playlistSidebarPrimaryInfoRenderer
// + playlistSidebarSecondaryInfoRenderer
if let Some(items) = body
.get("sidebar")
.and_then(|s| s.get("playlistSidebarRenderer"))
.and_then(|s| s.get("items"))
.and_then(|i| i.as_array())
{
for item in items {
if let Some(primary) = item.get("playlistSidebarPrimaryInfoRenderer") {
if info.name.is_empty() {
if let Some(s) = primary
.get("title")
.and_then(|t| t.get("runs"))
.and_then(|r| r.as_array())
.and_then(|a| a.first())
.and_then(|r| r.get("text"))
.and_then(|t| t.as_str())
{
info.name = s.into();
}
}
// stats[1] (video count) — "1,234 videos"
if let Some(stats) = primary.get("stats").and_then(|s| s.as_array()) {
if let Some(count_text) = stats
.get(0)
.and_then(|s| s.get("runs"))
.and_then(|r| r.as_array())
.and_then(|a| a.first())
.and_then(|r| r.get("text"))
.and_then(|t| t.as_str())
{
info.video_count = count_text
.replace(',', "")
.split_whitespace()
.next()
.and_then(|s| s.parse().ok())
.unwrap_or(-1);
}
}
}
if let Some(secondary) = item.get("playlistSidebarSecondaryInfoRenderer") {
if let Some(owner) = secondary.get("videoOwner").and_then(|o| {
o.get("videoOwnerRenderer")
}) {
if let Some(s) = owner
.get("title")
.and_then(|t| t.get("runs"))
.and_then(|r| r.as_array())
.and_then(|a| a.first())
{
if let Some(name) = s.get("text").and_then(|t| t.as_str()) {
info.uploader_name = name.into();
}
if let Some(endpoint) = s.get("navigationEndpoint") {
if let Some(browse_id) = endpoint
.get("browseEndpoint")
.and_then(|b| b.get("browseId"))
.and_then(|i| i.as_str())
{
info.uploader_id = browse_id.into();
info.uploader_url =
format!("https://www.youtube.com/channel/{browse_id}");
}
}
}
}
}
}
}
// contents.twoColumnBrowseResultsRenderer.tabs[0].tabRenderer.content
// .sectionListRenderer.contents[0].itemSectionRenderer.contents[0]
// .playlistVideoListRenderer.contents[]
let list_contents = body
.get("contents")
.and_then(|c| c.get("twoColumnBrowseResultsRenderer"))
.and_then(|c| c.get("tabs"))
.and_then(|t| t.as_array())
.and_then(|tabs| tabs.first())
.and_then(|t| t.get("tabRenderer"))
.and_then(|t| t.get("content"))
.and_then(|c| c.get("sectionListRenderer"))
.and_then(|s| s.get("contents"))
.and_then(|c| c.as_array())
.and_then(|arr| arr.first())
.and_then(|s| s.get("itemSectionRenderer"))
.and_then(|i| i.get("contents"))
.and_then(|c| c.as_array())
.and_then(|arr| arr.first())
.and_then(|s| s.get("playlistVideoListRenderer"))
.and_then(|p| p.get("contents"))
.and_then(|c| c.as_array());
if let Some(arr) = list_contents {
for item in arr {
if let Some(v) = item.get("playlistVideoRenderer") {
if let Some(s) = parse_playlist_video_renderer(v) {
info.videos.push(s);
}
} else if let Some(c) = item.get("continuationItemRenderer") {
info.continuation_token = c
.get("continuationEndpoint")
.and_then(|e| e.get("continuationCommand"))
.and_then(|c| c.get("token"))
.and_then(|t| t.as_str())
.map(String::from);
}
}
}
info
}
fn parse_playlist_video_renderer(renderer: &Value) -> Option<StreamInfoItem> {
let video_id = renderer.get("videoId")?.as_str()?.to_string();
let title = renderer
.get("title")
.and_then(|t| t.get("runs"))
.and_then(|r| r.as_array())
.and_then(|a| a.first())
.and_then(|r| r.get("text"))
.and_then(|t| t.as_str())
.unwrap_or("")
.to_string();
let uploader_name = renderer
.get("shortBylineText")
.and_then(|s| s.get("runs"))
.and_then(|r| r.as_array())
.and_then(|a| a.first())
.and_then(|r| r.get("text"))
.and_then(|t| t.as_str())
.unwrap_or("")
.to_string();
let duration_seconds = renderer
.get("lengthSeconds")
.and_then(|s| s.as_str())
.and_then(|s| s.parse().ok())
.unwrap_or(0);
Some(StreamInfoItem {
service_id: 0,
url: format!("https://www.youtube.com/watch?v={video_id}"),
name: title,
thumbnails: Vec::new(),
uploader_name,
uploader_url: String::new(),
uploader_id: String::new(),
uploader_verified: false,
duration_seconds,
view_count: -1,
upload_date_relative: String::new(),
stream_type: Some(crate::stream::StreamType::VideoStream),
short_description: String::new(),
})
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn parses_basic_playlist_meta() {
let body = json!({
"metadata":{"playlistMetadataRenderer":{
"title":"Coding music",
"description":"For long sessions."
}}
});
let info = parse_playlist_browse("PLxxx", &body);
assert_eq!(info.name, "Coding music");
assert_eq!(info.description, "For long sessions.");
assert_eq!(info.playlist_id, "PLxxx");
assert_eq!(info.url, "https://www.youtube.com/playlist?list=PLxxx");
}
#[test]
fn parses_video_list_and_continuation() {
let body = json!({
"contents":{"twoColumnBrowseResultsRenderer":{"tabs":[{
"tabRenderer":{"content":{"sectionListRenderer":{"contents":[{
"itemSectionRenderer":{"contents":[{
"playlistVideoListRenderer":{"contents":[
{"playlistVideoRenderer":{
"videoId":"abc",
"title":{"runs":[{"text":"First track"}]},
"shortBylineText":{"runs":[{"text":"NCS"}]},
"lengthSeconds":"234"
}},
{"continuationItemRenderer":{
"continuationEndpoint":{"continuationCommand":{
"token":"OPAQUE_CONT_TOKEN"
}}
}}
]}
}]}
}]}}}
}]}}
});
let info = parse_playlist_browse("PLxxx", &body);
assert_eq!(info.videos.len(), 1);
assert_eq!(info.videos[0].name, "First track");
assert_eq!(info.videos[0].uploader_name, "NCS");
assert_eq!(info.videos[0].duration_seconds, 234);
assert_eq!(info.continuation_token.as_deref(), Some("OPAQUE_CONT_TOKEN"));
}
}

View file

@ -0,0 +1,448 @@
// YoutubeSearchExtractor — mirrors NPE
// services/youtube/extractors/YoutubeSearchExtractor.java.
//
// Calls /youtubei/v1/search with the WEB client (via desktop fast-path
// envelope). Body shape per audit Track D §3:
// {
// "context": { "client": { ... } },
// "query": "<query>",
// "params": "<filter base64>" // omitted for All
// }
//
// Response walked:
// contents.twoColumnSearchResultsRenderer.primaryContents
// .sectionListRenderer.contents[]
// .itemSectionRenderer.contents[]
// → videoRenderer | channelRenderer | playlistRenderer | shelfRenderer
//
// `shelfRenderer` is a sub-section (e.g. "People also watched") whose
// `content.verticalListRenderer.items[]` are the same renderer types.
use serde_json::Value;
use crate::downloader::request::Request;
use crate::exceptions::{ExtractionError, NetworkError, ParsingError};
use crate::image::{Image, ResolutionLevel};
use crate::newpipe::NewPipe;
use crate::stream::{StreamInfoItem, StreamType};
use crate::youtube::client_request::build_desktop_envelope;
use crate::youtube::constants::*;
use crate::youtube::linkhandler::search::SearchFilter;
use crate::youtube::parsing::{web_client_version, youtube_post_headers};
#[derive(Clone, Debug, Default)]
pub struct SearchInfo {
pub query: String,
pub corrected_query: Option<String>,
pub videos: Vec<StreamInfoItem>,
pub continuation_token: Option<String>,
}
pub fn search(query: &str, filter: SearchFilter) -> Result<SearchInfo, ExtractionError> {
if filter.uses_music_endpoint() {
return Err(ExtractionError::Other(
"music search filters route to WEB_REMIX — not implemented in this phase".into(),
));
}
let downloader = NewPipe::downloader().ok_or(ExtractionError::DownloaderMissing)?;
let localization = NewPipe::preferred_localization();
let content_country = NewPipe::preferred_content_country();
let mut envelope = build_desktop_envelope(&localization, &content_country, &web_client_version());
if let Value::Object(ref mut map) = envelope {
map.insert("query".into(), Value::String(query.into()));
if let Some(params) = filter.params() {
map.insert("params".into(), Value::String(params.into()));
}
}
let url = format!("{YOUTUBEI_V1_URL}search{DISABLE_PRETTY_PRINT_PARAM}");
let body = serde_json::to_vec(&envelope).map_err(|e| {
ExtractionError::Parsing(ParsingError::Invalid(format!("serialize: {e}")))
})?;
let mut builder = Request::post(&url, body);
for (k, v) in youtube_post_headers() {
builder = builder.add_header(&k, &v);
}
let resp = downloader.execute(builder.build())?;
if resp.response_code() != 200 {
return Err(ExtractionError::Network(NetworkError::Transport(format!(
"search HTTP {}",
resp.response_code()
))));
}
let parsed: Value = serde_json::from_str(resp.response_body())
.map_err(|e| ExtractionError::Parsing(ParsingError::JsonShape(e.to_string())))?;
Ok(parse_search_response(query, &parsed))
}
pub fn parse_search_response(query: &str, body: &Value) -> SearchInfo {
let mut info = SearchInfo {
query: query.to_string(),
..SearchInfo::default()
};
let primary = body
.get("contents")
.and_then(|c| c.get("twoColumnSearchResultsRenderer"))
.and_then(|c| c.get("primaryContents"))
.and_then(|c| c.get("sectionListRenderer"))
.and_then(|c| c.get("contents"));
if let Some(sections) = primary.and_then(|v| v.as_array()) {
for section in sections {
if let Some(items) = section
.get("itemSectionRenderer")
.and_then(|s| s.get("contents"))
.and_then(|c| c.as_array())
{
for item in items {
extract_item_into(item, &mut info);
}
}
if let Some(ct) = section
.get("continuationItemRenderer")
.and_then(|s| s.get("continuationEndpoint"))
.and_then(|c| c.get("continuationCommand"))
.and_then(|c| c.get("token"))
.and_then(|t| t.as_str())
{
info.continuation_token = Some(ct.to_string());
}
}
}
if let Some(corrected) = body
.get("contents")
.and_then(|c| c.get("twoColumnSearchResultsRenderer"))
.and_then(|c| c.get("primaryContents"))
.and_then(|c| c.get("sectionListRenderer"))
.and_then(|c| c.get("contents"))
.and_then(|c| c.as_array())
.and_then(|arr| {
arr.iter().find_map(|s| {
s.get("showingResultsForRenderer")
.and_then(|r| r.get("correctedQuery"))
.and_then(|q| q.get("runs"))
.and_then(|r| r.as_array())
.and_then(|a| a.first())
.and_then(|r| r.get("text"))
.and_then(|t| t.as_str())
})
})
{
info.corrected_query = Some(corrected.to_string());
}
info
}
fn extract_item_into(item: &Value, info: &mut SearchInfo) {
if let Some(video) = item.get("videoRenderer") {
if let Some(s) = parse_video_renderer(video) {
info.videos.push(s);
}
} else if let Some(shelf) = item.get("shelfRenderer") {
if let Some(items) = shelf
.get("content")
.and_then(|c| c.get("verticalListRenderer"))
.and_then(|v| v.get("items"))
.and_then(|i| i.as_array())
{
for inner in items {
extract_item_into(inner, info);
}
}
}
// channelRenderer and playlistRenderer parsing is intentionally
// omitted from Phase 6a — landed in Phase 6b along with channel/
// playlist extractors.
}
pub(crate) mod test_helpers {
use super::*;
pub fn video_renderer_to_item(renderer: &Value) -> Option<StreamInfoItem> {
super::parse_video_renderer(renderer)
}
}
fn parse_video_renderer(renderer: &Value) -> Option<StreamInfoItem> {
let video_id = renderer.get("videoId")?.as_str()?.to_string();
let title = runs_text(renderer.get("title"));
let uploader_name = runs_text(renderer.get("ownerText"))
.or_else(|| runs_text(renderer.get("longBylineText")))
.unwrap_or_default();
let uploader_endpoint = renderer
.get("ownerText")
.and_then(|o| o.get("runs"))
.and_then(|r| r.as_array())
.and_then(|a| a.first())
.and_then(|r| r.get("navigationEndpoint"));
let uploader_url = uploader_endpoint
.and_then(|e| e.get("commandMetadata"))
.and_then(|m| m.get("webCommandMetadata"))
.and_then(|w| w.get("url"))
.and_then(|u| u.as_str())
.map(|p| format!("https://www.youtube.com{p}"))
.unwrap_or_default();
let uploader_id = uploader_endpoint
.and_then(|e| e.get("browseEndpoint"))
.and_then(|b| b.get("browseId"))
.and_then(|i| i.as_str())
.unwrap_or("")
.to_string();
let duration_seconds = renderer
.get("lengthText")
.and_then(|l| l.get("simpleText"))
.and_then(|s| s.as_str())
.map(parse_duration_string)
.unwrap_or(0);
let view_count = renderer
.get("viewCountText")
.and_then(|c| c.get("simpleText"))
.and_then(|s| s.as_str())
.or_else(|| {
renderer
.get("shortViewCountText")
.and_then(|c| c.get("simpleText"))
.and_then(|s| s.as_str())
})
.map(parse_view_count)
.unwrap_or(-1);
let upload_relative = renderer
.get("publishedTimeText")
.and_then(|p| p.get("simpleText"))
.and_then(|s| s.as_str())
.unwrap_or("")
.to_string();
let stream_type = if renderer
.get("badges")
.and_then(|b| b.as_array())
.map(|arr| {
arr.iter().any(|b| {
b.get("metadataBadgeRenderer")
.and_then(|m| m.get("label"))
.and_then(|l| l.as_str())
.map(|s| s.eq_ignore_ascii_case("live"))
.unwrap_or(false)
})
})
.unwrap_or(false)
{
StreamType::VideoLiveStream
} else {
StreamType::VideoStream
};
let short_description = runs_text(renderer.get("detailedMetadataSnippets"))
.or_else(|| runs_text(renderer.get("descriptionSnippet")))
.unwrap_or_default();
let mut thumbnails = Vec::new();
if let Some(arr) = renderer
.get("thumbnail")
.and_then(|t| t.get("thumbnails"))
.and_then(|t| t.as_array())
{
for t in arr {
if let Some(url) = t.get("url").and_then(|v| v.as_str()) {
let h = t.get("height").and_then(|v| v.as_i64()).unwrap_or(-1) as i32;
let w = t.get("width").and_then(|v| v.as_i64()).unwrap_or(-1) as i32;
thumbnails.push(Image::new(url, h, w, ResolutionLevel::from_height(h)));
}
}
}
let uploader_verified = renderer
.get("ownerBadges")
.and_then(|b| b.as_array())
.map(|arr| {
arr.iter().any(|b| {
b.get("metadataBadgeRenderer")
.and_then(|m| m.get("style"))
.and_then(|s| s.as_str())
.map(|s| s == "BADGE_STYLE_TYPE_VERIFIED" || s == "BADGE_STYLE_TYPE_VERIFIED_ARTIST")
.unwrap_or(false)
})
})
.unwrap_or(false);
Some(StreamInfoItem {
service_id: 0,
url: format!("https://www.youtube.com/watch?v={video_id}"),
name: title.unwrap_or_default(),
thumbnails,
uploader_name,
uploader_url,
uploader_id,
uploader_verified,
duration_seconds,
view_count,
upload_date_relative: upload_relative,
stream_type: Some(stream_type),
short_description,
})
}
fn runs_text(value: Option<&Value>) -> Option<String> {
let v = value?;
if let Some(s) = v.get("simpleText").and_then(|s| s.as_str()) {
return Some(s.to_string());
}
if let Some(arr) = v.get("runs").and_then(|r| r.as_array()) {
let joined: String = arr
.iter()
.filter_map(|r| r.get("text").and_then(|t| t.as_str()))
.collect();
if !joined.is_empty() {
return Some(joined);
}
}
None
}
fn parse_duration_string(s: &str) -> i64 {
// YT durations: "M:SS", "MM:SS", "H:MM:SS", "HH:MM:SS".
let parts: Vec<&str> = s.split(':').collect();
let mut total: i64 = 0;
for part in &parts {
let n: i64 = part.trim().parse().unwrap_or(0);
total = total * 60 + n;
}
total
}
fn parse_view_count(s: &str) -> i64 {
// Examples: "1,234,567 views", "42K views", "1.2M views"
let s = s.replace([',', '\u{00a0}'], "");
let s = s.trim();
let (num_part, mult) = if let Some(rest) = s.strip_suffix("K views") {
(rest.trim(), 1_000.0)
} else if let Some(rest) = s.strip_suffix("M views") {
(rest.trim(), 1_000_000.0)
} else if let Some(rest) = s.strip_suffix("B views") {
(rest.trim(), 1_000_000_000.0)
} else if let Some(rest) = s.strip_suffix(" views") {
(rest.trim(), 1.0)
} else {
(s, 1.0)
};
num_part
.parse::<f64>()
.map(|n| (n * mult) as i64)
.unwrap_or(-1)
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn duration_parsing() {
assert_eq!(parse_duration_string("0:42"), 42);
assert_eq!(parse_duration_string("3:14"), 194);
assert_eq!(parse_duration_string("1:02:03"), 3723);
assert_eq!(parse_duration_string("10:00:00"), 36000);
}
#[test]
fn view_count_parsing() {
assert_eq!(parse_view_count("1,234,567 views"), 1_234_567);
assert_eq!(parse_view_count("42K views"), 42_000);
assert_eq!(parse_view_count("1.5M views"), 1_500_000);
assert_eq!(parse_view_count("3B views"), 3_000_000_000);
assert_eq!(parse_view_count("1 view"), -1); // not "views" plural — NPE accepts both
}
#[test]
fn runs_text_joins_runs() {
let v = json!({"runs":[{"text":"Hello, "},{"text":"world"}]});
assert_eq!(runs_text(Some(&v)).as_deref(), Some("Hello, world"));
}
#[test]
fn runs_text_handles_simple() {
let v = json!({"simpleText":"just text"});
assert_eq!(runs_text(Some(&v)).as_deref(), Some("just text"));
}
#[test]
fn parses_one_video_renderer_in_section() {
let body = json!({
"contents":{"twoColumnSearchResultsRenderer":{"primaryContents":{
"sectionListRenderer":{"contents":[{
"itemSectionRenderer":{"contents":[{
"videoRenderer":{
"videoId":"n4tK7LYFxI0",
"title":{"runs":[{"text":"Spektrem - Shine"}]},
"ownerText":{"runs":[{"text":"NoCopyrightSounds"}]},
"lengthText":{"simpleText":"3:54"},
"viewCountText":{"simpleText":"42,000,000 views"},
"publishedTimeText":{"simpleText":"8 years ago"}
}
}]}
}]}
}}}
});
let info = parse_search_response("Spektrem", &body);
assert_eq!(info.videos.len(), 1);
let v = &info.videos[0];
assert_eq!(v.name, "Spektrem - Shine");
assert_eq!(v.uploader_name, "NoCopyrightSounds");
assert_eq!(v.duration_seconds, 234);
assert_eq!(v.view_count, 42_000_000);
assert_eq!(v.upload_date_relative, "8 years ago");
assert_eq!(v.url, "https://www.youtube.com/watch?v=n4tK7LYFxI0");
}
#[test]
fn parses_continuation_token() {
let body = json!({
"contents":{"twoColumnSearchResultsRenderer":{"primaryContents":{
"sectionListRenderer":{"contents":[
{"continuationItemRenderer":{
"continuationEndpoint":{
"continuationCommand":{"token":"OPAQUE_TOKEN_XYZ"}
}
}}
]}
}}}
});
let info = parse_search_response("x", &body);
assert_eq!(info.continuation_token.as_deref(), Some("OPAQUE_TOKEN_XYZ"));
}
#[test]
fn parses_corrected_query_hint() {
let body = json!({
"contents":{"twoColumnSearchResultsRenderer":{"primaryContents":{
"sectionListRenderer":{"contents":[
{"showingResultsForRenderer":{"correctedQuery":{"runs":[{"text":"spektrem"}]}}}
]}
}}}
});
let info = parse_search_response("spektram", &body);
assert_eq!(info.corrected_query.as_deref(), Some("spektrem"));
}
#[test]
fn shelf_renderer_is_walked() {
let body = json!({
"contents":{"twoColumnSearchResultsRenderer":{"primaryContents":{
"sectionListRenderer":{"contents":[{
"itemSectionRenderer":{"contents":[{
"shelfRenderer":{
"content":{"verticalListRenderer":{"items":[
{"videoRenderer":{"videoId":"AAAAAAAAAA1","title":{"simpleText":"In shelf"}}}
]}}
}
}]}
}]}
}}}
});
let info = parse_search_response("x", &body);
assert_eq!(info.videos.len(), 1);
assert_eq!(info.videos[0].name, "In shelf");
}
}

View file

@ -0,0 +1,91 @@
// YoutubeSuggestionExtractor — search-as-you-type autocomplete.
// Mirrors NPE services/youtube/extractors/YoutubeSuggestionExtractor.java.
//
// Endpoint:
// GET https://suggestqueries-clients6.youtube.com/complete/search
// ?client=youtube&ds=yt&gl=<cc>&q=<query>&xhr=t
//
// Returns a JSON array shaped like: `[query, [[suggestion, 0], ...], {}]`.
// The XSSI prefix `)]}'\n` may NOT be present — NPE handles both cases.
use serde_json::Value;
use url::form_urlencoded;
use crate::downloader::request::Request;
use crate::exceptions::{ExtractionError, NetworkError, ParsingError};
use crate::newpipe::NewPipe;
pub fn suggestions(query: &str) -> Result<Vec<String>, ExtractionError> {
let downloader = NewPipe::downloader().ok_or(ExtractionError::DownloaderMissing)?;
let cc = NewPipe::preferred_content_country();
let encoded: String = form_urlencoded::Serializer::new(String::new())
.append_pair("client", "youtube")
.append_pair("ds", "yt")
.append_pair("gl", cc.country_code())
.append_pair("q", query)
.append_pair("xhr", "t")
.finish();
let url =
format!("https://suggestqueries-clients6.youtube.com/complete/search?{encoded}");
let req = Request::get(&url).build();
let resp = downloader.execute(req)?;
if resp.response_code() != 200 {
return Err(ExtractionError::Network(NetworkError::Transport(format!(
"suggest HTTP {}",
resp.response_code()
))));
}
let body = resp.response_body();
let stripped = body.strip_prefix(")]}'\n").unwrap_or(body);
let parsed: Value = serde_json::from_str(stripped)
.map_err(|e| ExtractionError::Parsing(ParsingError::JsonShape(e.to_string())))?;
Ok(parse_suggestions(&parsed))
}
pub fn parse_suggestions(value: &Value) -> Vec<String> {
value
.as_array()
.and_then(|outer| outer.get(1))
.and_then(|inner| inner.as_array())
.map(|arr| {
arr.iter()
.filter_map(|entry| {
entry.as_array().and_then(|e| e.first()).and_then(|s| s.as_str())
})
.map(String::from)
.collect()
})
.unwrap_or_default()
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn parses_typical_suggest_response() {
let body = json!([
"spek",
[["spektrem", 0], ["spektrum", 0], ["spek tek", 0]],
{}
]);
let out = parse_suggestions(&body);
assert_eq!(out, vec!["spektrem", "spektrum", "spek tek"]);
}
#[test]
fn empty_suggestions_array() {
let body = json!(["q", []]);
let out = parse_suggestions(&body);
assert!(out.is_empty());
}
#[test]
fn handles_malformed() {
let body = json!({});
assert!(parse_suggestions(&body).is_empty());
}
}