strawcore/src/youtube/channel.rs

// YoutubeChannelExtractor + helper.resolveChannelId — fetches channel
// info via /youtubei/v1/browse. Mirrors NPE
// services/youtube/extractors/YoutubeChannelExtractor.java +
// YoutubeChannelHelper.java.
//
// Handle / custom URL / legacy user resolution: NPE issues
// `/youtubei/v1/navigation/resolve_url` against the `youtube.com/@handle`
// URL, walks `endpoint.browseEndpoint.browseId` to get the UC... id, and
// retries the browse call. Up to 3 redirect hops.
//
// Tab parsing (videos/shorts/live/playlists) is in audit Track D §5 —
// `tabs[].tabRenderer.endpoint.browseEndpoint.params` is the magic
// base64 needed to land on each tab.

use serde_json::Value;

use crate::downloader::request::Request;
use crate::exceptions::{ExtractionError, NetworkError, ParsingError};
use crate::image::{Image, ImageSet, ResolutionLevel};
use crate::newpipe::NewPipe;
use crate::stream::StreamInfoItem;
use crate::youtube::client_request::build_desktop_envelope;
use crate::youtube::constants::*;
use crate::youtube::linkhandler::channel::ChannelIdentifier;
use crate::youtube::parsing::{web_client_version, youtube_post_headers};

#[derive(Clone, Debug, Default)]
pub struct ChannelInfo {
    pub channel_id: String,
    pub url: String,
    pub name: String,
    pub description: String,
    pub avatars: ImageSet,
    pub banners: ImageSet,
    pub subscriber_count: i64,
    pub verified: bool,
    pub recent_videos: Vec<StreamInfoItem>,
    pub videos_continuation: Option<String>,
}

pub fn channel_info(identifier: ChannelIdentifier) -> Result<ChannelInfo, ExtractionError> {
    let resolved = match identifier {
        ChannelIdentifier::DirectId(id) => id,
        ChannelIdentifier::Handle(h) => resolve_handle_to_channel_id(&format!("@{h}"))?,
        ChannelIdentifier::Custom(c) => resolve_handle_to_channel_id(&format!("c/{c}"))?,
        ChannelIdentifier::LegacyUser(u) => resolve_handle_to_channel_id(&format!("user/{u}"))?,
    };
    fetch_channel_browse(&resolved)
}

pub fn resolve_handle_to_channel_id(url_fragment: &str) -> Result<String, ExtractionError> {
    let downloader = NewPipe::downloader().ok_or(ExtractionError::DownloaderMissing)?;
    let localization = NewPipe::preferred_localization();
    let content_country = NewPipe::preferred_content_country();
    let target_url = format!("https://www.youtube.com/{url_fragment}");
    let mut envelope = build_desktop_envelope(&localization, &content_country, &web_client_version());
    if let Value::Object(ref mut map) = envelope {
        map.insert("url".into(), Value::String(target_url));
    }
    let url = format!("{YOUTUBEI_V1_URL}navigation/resolve_url{DISABLE_PRETTY_PRINT_PARAM}");
    let body = serde_json::to_vec(&envelope).map_err(|e| {
        ExtractionError::Parsing(ParsingError::Invalid(format!("serialize: {e}")))
    })?;
    let mut builder = Request::post(&url, body);
    for (k, v) in youtube_post_headers() {
        builder = builder.add_header(&k, &v);
    }
    let resp = downloader.execute(builder.build())?;
    if resp.response_code() != 200 {
        return Err(ExtractionError::Network(NetworkError::Transport(format!(
            "resolve_url HTTP {}",
            resp.response_code()
        ))));
    }
    let parsed: Value = serde_json::from_str(resp.response_body())
        .map_err(|e| ExtractionError::Parsing(ParsingError::JsonShape(e.to_string())))?;
    parsed
        .get("endpoint")
        .and_then(|e| e.get("browseEndpoint"))
        .and_then(|b| b.get("browseId"))
        .and_then(|i| i.as_str())
        .map(String::from)
        .ok_or_else(|| {
            ExtractionError::Parsing(ParsingError::MissingField(
                "endpoint.browseEndpoint.browseId".into(),
            ))
        })
}

/// Magic params for the channel "Videos" tab — opaque base64. Same constant
/// NPE uses (audit Track A §2.4). Sending it with the channel browseId
/// switches YT's response from the Home tab to the Videos tab.
const CHANNEL_VIDEOS_TAB_PARAMS: &str = "EgZ2aWRlb3PyBgQKAjoA";

pub fn fetch_channel_browse(channel_id: &str) -> Result<ChannelInfo, ExtractionError> {
    // First browse — Home tab. Gives us channel header + metadata. YT
    // doesn't ship video items here for most channels in 2026.
    let home_response = fetch_browse(channel_id, None)?;
    let mut info = parse_channel_browse(channel_id, &home_response);

    // Second browse — Videos tab. Best-effort: any failure here just
    // leaves recent_videos empty (header still populated from first browse).
    if let Ok(videos_response) = fetch_browse(channel_id, Some(CHANNEL_VIDEOS_TAB_PARAMS)) {
        info.recent_videos = parse_videos_tab(&videos_response);
        if let Some(token) = parse_videos_continuation(&videos_response) {
            info.videos_continuation = Some(token);
        }
    }
    Ok(info)
}

fn fetch_browse(channel_id: &str, params: Option<&str>) -> Result<Value, ExtractionError> {
    let downloader = NewPipe::downloader().ok_or(ExtractionError::DownloaderMissing)?;
    let localization = NewPipe::preferred_localization();
    let content_country = NewPipe::preferred_content_country();
    let mut envelope =
        build_desktop_envelope(&localization, &content_country, &web_client_version());
    if let Value::Object(ref mut map) = envelope {
        map.insert("browseId".into(), Value::String(channel_id.into()));
        if let Some(p) = params {
            map.insert("params".into(), Value::String(p.into()));
        }
    }
    let url = format!("{YOUTUBEI_V1_URL}browse{DISABLE_PRETTY_PRINT_PARAM}");
    let body = serde_json::to_vec(&envelope).map_err(|e| {
        ExtractionError::Parsing(ParsingError::Invalid(format!("serialize: {e}")))
    })?;
    let mut builder = Request::post(&url, body);
    for (k, v) in youtube_post_headers() {
        builder = builder.add_header(&k, &v);
    }
    let resp = downloader.execute(builder.build())?;
    if resp.response_code() != 200 {
        return Err(ExtractionError::Network(NetworkError::Transport(format!(
            "browse HTTP {}",
            resp.response_code()
        ))));
    }
    serde_json::from_str(resp.response_body())
        .map_err(|e| ExtractionError::Parsing(ParsingError::JsonShape(e.to_string())))
}

pub fn parse_channel_browse(channel_id: &str, body: &Value) -> ChannelInfo {
    let mut info = ChannelInfo {
        channel_id: channel_id.to_string(),
        url: format!("https://www.youtube.com/channel/{channel_id}"),
        ..ChannelInfo::default()
    };

    // C4_TABBED header flavor is the most common.
    if let Some(header) = body
        .get("header")
        .and_then(|h| h.get("c4TabbedHeaderRenderer"))
    {
        if let Some(s) = header.get("title").and_then(|t| t.as_str()) {
            info.name = s.to_string();
        }
        info.avatars = parse_image_set(header.get("avatar"));
        info.banners = parse_image_set(header.get("banner"));
        if let Some(text) = header
            .get("subscriberCountText")
            .and_then(|s| s.get("simpleText"))
            .and_then(|s| s.as_str())
        {
            info.subscriber_count = parse_subscriber_count(text);
        }
        if let Some(badges) = header.get("badges").and_then(|b| b.as_array()) {
            info.verified = badges.iter().any(|b| {
                b.get("metadataBadgeRenderer")
                    .and_then(|m| m.get("style"))
                    .and_then(|s| s.as_str())
                    .map(|s| s.starts_with("BADGE_STYLE_TYPE_VERIFIED"))
                    .unwrap_or(false)
            });
        }
    }
    // Alternative pageHeaderRenderer (newer flavor — 2025+).
    // Avatar nests deep under decoratedAvatarViewModel.avatar.avatarViewModel
    // .image.sources[]; banner under contentBanner...image.sources[].
    // YT keeps adding wrappers in this branch — walk all the known
    // intermediates and parse the first sources[] we find.
    else if let Some(header) = body
        .get("header")
        .and_then(|h| h.get("pageHeaderRenderer"))
    {
        if let Some(s) = header.get("pageTitle").and_then(|t| t.as_str()) {
            info.name = s.to_string();
        }
        if info.avatars.is_empty() {
            info.avatars = parse_page_header_avatar(header);
        }
    }

    // microformat / description / avatar fallback. metadata.channel
    // MetadataRenderer.avatar.thumbnails[] is the most reliable avatar
    // path — present whether the header is c4TabbedHeaderRenderer or
    // pageHeaderRenderer. Use it as a last-resort backfill so newer
    // channel layouts that don't expose the avatar in the header still
    // give us SOMETHING.
    let metadata = body
        .get("metadata")
        .and_then(|m| m.get("channelMetadataRenderer"));
    if let Some(m) = metadata {
        if let Some(desc) = m.get("description").and_then(|d| d.as_str()) {
            info.description = desc.to_string();
        }
        if info.avatars.is_empty() {
            info.avatars = parse_image_set(m.get("avatar"));
        }
    }

    // Note: recent_videos are populated by a separate second browse to
    // the Videos tab — see fetch_channel_browse. The first browse's Home
    // tab does NOT contain a clean video grid in current YT.
    info
}

/// Walk the Videos-tab browse response into a list of StreamInfoItems.
/// Handles BOTH old-style `videoRenderer` items and new-style
/// `lockupViewModel` items (YT migrated channel-videos UI to
/// lockupViewModel around 2024).
fn parse_videos_tab(body: &Value) -> Vec<StreamInfoItem> {
    let mut out = Vec::new();
    let tabs = body
        .get("contents")
        .and_then(|c| c.get("twoColumnBrowseResultsRenderer"))
        .and_then(|c| c.get("tabs"))
        .and_then(|t| t.as_array());
    let Some(tabs) = tabs else { return out };

    for tab in tabs {
        let Some(tr) = tab.get("tabRenderer") else { continue };
        if !tr
            .get("selected")
            .and_then(|s| s.as_bool())
            .unwrap_or(false)
        {
            continue;
        }
        let Some(items) = tr
            .get("content")
            .and_then(|c| c.get("richGridRenderer"))
            .and_then(|g| g.get("contents"))
            .and_then(|c| c.as_array())
        else {
            continue;
        };
        for cell in items {
            // richItemRenderer carries either videoRenderer (legacy) or
            // lockupViewModel (current 2026 YT).
            let Some(content) = cell
                .get("richItemRenderer")
                .and_then(|r| r.get("content"))
            else {
                continue;
            };
            if let Some(vr) = content.get("videoRenderer") {
                if let Some(item) = crate::youtube::search_extractor::renderer_helpers::video_renderer_to_item(vr) {
                    out.push(item);
                }
            } else if let Some(lvm) = content.get("lockupViewModel") {
                if let Some(item) = parse_lockup_video(lvm) {
                    out.push(item);
                }
            }
        }
    }
    out
}

fn parse_videos_continuation(body: &Value) -> Option<String> {
    let tabs = body
        .get("contents")
        .and_then(|c| c.get("twoColumnBrowseResultsRenderer"))
        .and_then(|c| c.get("tabs"))
        .and_then(|t| t.as_array())?;
    for tab in tabs {
        let Some(tr) = tab.get("tabRenderer") else { continue };
        if !tr.get("selected").and_then(|s| s.as_bool()).unwrap_or(false) {
            continue;
        }
        let items = tr
            .get("content")
            .and_then(|c| c.get("richGridRenderer"))
            .and_then(|g| g.get("contents"))
            .and_then(|c| c.as_array())?;
        for cell in items {
            if let Some(token) = cell
                .get("continuationItemRenderer")
                .and_then(|s| s.get("continuationEndpoint"))
                .and_then(|c| c.get("continuationCommand"))
                .and_then(|c| c.get("token"))
                .and_then(|t| t.as_str())
            {
                return Some(token.to_string());
            }
        }
    }
    None
}

fn parse_lockup_video(lvm: &Value) -> Option<StreamInfoItem> {
    // lockupViewModel only carries videos when contentType says so. Skip
    // playlists, shorts collections, channel-redirects, etc.
    let content_type = lvm.get("contentType").and_then(|v| v.as_str()).unwrap_or("");
    if content_type != "LOCKUP_CONTENT_TYPE_VIDEO" {
        return None;
    }
    let video_id = lvm.get("contentId").and_then(|v| v.as_str())?.to_string();
    if video_id.len() != 11 {
        return None;
    }

    let lockup_md = lvm
        .get("metadata")
        .and_then(|m| m.get("lockupMetadataViewModel"))?;
    let title = lockup_md
        .get("title")
        .and_then(|t| t.get("content"))
        .and_then(|v| v.as_str())
        .unwrap_or("")
        .to_string();

    // metadataRows[0] = ["1.1m views", "2 years ago"]. metadataRows[1] is
    // sometimes uploader name (when shown on home/search lockups) but on
    // a channel's own Videos tab it's not present (we know the channel).
    let mut view_count = -1i64;
    let mut upload_relative = String::new();
    let mut uploader_name = String::new();
    if let Some(cmv) = lockup_md
        .get("metadata")
        .and_then(|m| m.get("contentMetadataViewModel"))
    {
        if let Some(rows) = cmv.get("metadataRows").and_then(|r| r.as_array()) {
            for row in rows {
                let Some(parts) = row.get("metadataParts").and_then(|p| p.as_array()) else {
                    continue;
                };
                for part in parts {
                    let Some(txt) = part
                        .get("text")
                        .and_then(|t| t.get("content"))
                        .and_then(|v| v.as_str())
                    else {
                        continue;
                    };
                    let lc = txt.to_ascii_lowercase();
                    if lc.contains("view") && view_count < 0 {
                        view_count = parse_lockup_view_count(txt);
                    } else if (lc.contains("ago")
                        || lc.contains("hour")
                        || lc.contains("minute")
                        || lc.contains("yesterday")
                        || lc.contains("days")
                        || lc.contains("weeks")
                        || lc.contains("months")
                        || lc.contains("years"))
                        && upload_relative.is_empty()
                    {
                        upload_relative = txt.to_string();
                    } else if uploader_name.is_empty()
                        && !lc.contains("view")
                        && !lc.contains("ago")
                    {
                        uploader_name = txt.to_string();
                    }
                }
            }
        }
    }

    // duration text lives in a thumbnail overlay badge ("3:14:08")
    let mut duration_seconds = 0i64;
    if let Some(overlays) = lvm
        .get("contentImage")
        .and_then(|c| c.get("thumbnailViewModel"))
        .and_then(|t| t.get("overlays"))
        .and_then(|o| o.as_array())
    {
        for ov in overlays {
            if let Some(badges) = ov
                .get("thumbnailBottomOverlayViewModel")
                .and_then(|b| b.get("badges"))
                .and_then(|b| b.as_array())
            {
                for b in badges {
                    if let Some(txt) = b
                        .get("thumbnailBadgeViewModel")
                        .and_then(|m| m.get("text"))
                        .and_then(|v| v.as_str())
                    {
                        if txt.contains(':') && duration_seconds == 0 {
                            duration_seconds = parse_duration_clock(txt);
                        }
                    }
                }
            }
        }
    }

    // thumbnails — sources array, pre-sorted ascending by size
    let mut thumbnails = Vec::new();
    if let Some(sources) = lvm
        .get("contentImage")
        .and_then(|c| c.get("thumbnailViewModel"))
        .and_then(|t| t.get("image"))
        .and_then(|i| i.get("sources"))
        .and_then(|s| s.as_array())
    {
        for src in sources {
            if let Some(url) = src.get("url").and_then(|v| v.as_str()) {
                let h = src.get("height").and_then(|v| v.as_i64()).unwrap_or(-1) as i32;
                let w = src.get("width").and_then(|v| v.as_i64()).unwrap_or(-1) as i32;
                thumbnails.push(Image::new(url, h, w, ResolutionLevel::from_height(h)));
            }
        }
    }

    Some(StreamInfoItem {
        service_id: 0,
        url: format!("https://www.youtube.com/watch?v={video_id}"),
        name: title,
        thumbnails,
        uploader_name,
        uploader_url: String::new(),
        uploader_id: String::new(),
        uploader_verified: false,
        duration_seconds,
        view_count,
        upload_date_relative: upload_relative,
        stream_type: Some(crate::stream::StreamType::VideoStream),
        short_description: String::new(),
    })
}

fn parse_lockup_view_count(text: &str) -> i64 {
    // "1.1m views" / "23k views" / "5.4b views" / "999 views"
    let cleaned = text.to_ascii_lowercase().replace(",", "");
    let cleaned = cleaned.replace(" views", "").replace(" view", "");
    let cleaned = cleaned.trim();
    let (num, mult) = if let Some(n) = cleaned.strip_suffix('k') {
        (n.trim(), 1_000.0)
    } else if let Some(n) = cleaned.strip_suffix('m') {
        (n.trim(), 1_000_000.0)
    } else if let Some(n) = cleaned.strip_suffix('b') {
        (n.trim(), 1_000_000_000.0)
    } else {
        (cleaned, 1.0)
    };
    num.parse::<f64>().map(|n| (n * mult) as i64).unwrap_or(-1)
}

fn parse_duration_clock(text: &str) -> i64 {
    let mut total = 0i64;
    for part in text.split(':') {
        let n: i64 = part.trim().parse().unwrap_or(0);
        total = total * 60 + n;
    }
    total
}

fn parse_image_set(value: Option<&Value>) -> ImageSet {
    let mut out = Vec::new();
    if let Some(arr) = value
        .and_then(|v| v.get("thumbnails"))
        .and_then(|t| t.as_array())
    {
        for t in arr {
            if let Some(url) = t.get("url").and_then(|v| v.as_str()) {
                let h = t.get("height").and_then(|v| v.as_i64()).unwrap_or(-1) as i32;
                let w = t.get("width").and_then(|v| v.as_i64()).unwrap_or(-1) as i32;
                out.push(Image::new(url, h, w, ResolutionLevel::from_height(h)));
            }
        }
    }
    out
}

/// Avatar extraction for the newer pageHeaderRenderer flavor.
///
/// Walks `header.content.pageHeaderViewModel.image` and finds the first
/// `sources[]` array it can — that lives under either
/// `decoratedAvatarViewModel.avatar.avatarViewModel.image.sources` or,
/// on some channels, the slightly shallower
/// `avatarViewModel.image.sources`. Returns ImageSet ordered by source
/// height ascending — matches what parse_image_set produces for the
/// legacy `thumbnails[]` path, so .last() still gives the largest one.
fn parse_page_header_avatar(header: &Value) -> ImageSet {
    let content = header
        .get("content")
        .and_then(|c| c.get("pageHeaderViewModel"));
    let Some(content) = content else { return Vec::new() };
    let image = content.get("image");
    let Some(image) = image else { return Vec::new() };

    // Try a couple of nestings — YT migrates the exact path occasionally
    // and we want to keep parsing through future shuffles.
    let candidates = [
        image
            .get("decoratedAvatarViewModel")
            .and_then(|d| d.get("avatar"))
            .and_then(|a| a.get("avatarViewModel"))
            .and_then(|a| a.get("image"))
            .and_then(|i| i.get("sources")),
        image
            .get("avatarViewModel")
            .and_then(|a| a.get("image"))
            .and_then(|i| i.get("sources")),
        image.get("sources"),
    ];

    for src in candidates.into_iter().flatten() {
        if let Some(arr) = src.as_array() {
            let mut out = Vec::with_capacity(arr.len());
            for s in arr {
                let Some(url) = s.get("url").and_then(|v| v.as_str()) else { continue };
                let w = s.get("width").and_then(|v| v.as_i64()).unwrap_or(-1) as i32;
                let h = s.get("height").and_then(|v| v.as_i64()).unwrap_or(-1) as i32;
                out.push(Image::new(url, h, w, ResolutionLevel::from_height(h)));
            }
            // Match parse_image_set ordering — caller uses .last() for
            // the largest size.
            out.sort_by_key(|i| i.height());
            if !out.is_empty() {
                return out;
            }
        }
    }
    Vec::new()
}

fn parse_subscriber_count(text: &str) -> i64 {
    // "12.5M subscribers" / "1.2K subscribers" / "350 subscribers"
    let stripped = text
        .replace("subscribers", "")
        .replace("subscriber", "")
        .trim()
        .to_string();
    let (num, mult) = if let Some(n) = stripped.strip_suffix('K') {
        (n.trim(), 1_000.0)
    } else if let Some(n) = stripped.strip_suffix('M') {
        (n.trim(), 1_000_000.0)
    } else if let Some(n) = stripped.strip_suffix('B') {
        (n.trim(), 1_000_000_000.0)
    } else {
        (stripped.trim(), 1.0)
    };
    num.replace(',', "")
        .parse::<f64>()
        .map(|n| (n * mult) as i64)
        .unwrap_or(-1)
}

#[cfg(test)]
mod tests {
    use super::*;
    use serde_json::json;

    #[test]
    fn subscriber_count_parsing() {
        assert_eq!(parse_subscriber_count("350 subscribers"), 350);
        assert_eq!(parse_subscriber_count("1.2K subscribers"), 1_200);
        assert_eq!(parse_subscriber_count("12.5M subscribers"), 12_500_000);
        assert_eq!(parse_subscriber_count("2B subscribers"), 2_000_000_000);
        assert_eq!(parse_subscriber_count("1 subscriber"), 1);
    }

    #[test]
    fn parses_c4_tabbed_header() {
        let body = json!({
            "header":{"c4TabbedHeaderRenderer":{
                "title":"NoCopyrightSounds",
                "subscriberCountText":{"simpleText":"42.5M subscribers"},
                "badges":[{"metadataBadgeRenderer":{"style":"BADGE_STYLE_TYPE_VERIFIED_ARTIST"}}]
            }},
            "metadata":{"channelMetadataRenderer":{"description":"Royalty-free music"}}
        });
        let info = parse_channel_browse("UC_aEa8K-EOJ3D6gOs7HcyNg", &body);
        assert_eq!(info.name, "NoCopyrightSounds");
        assert_eq!(info.description, "Royalty-free music");
        assert_eq!(info.subscriber_count, 42_500_000);
        assert!(info.verified);
        assert_eq!(info.channel_id, "UC_aEa8K-EOJ3D6gOs7HcyNg");
    }

    #[test]
    fn parses_page_header_renderer_fallback() {
        let body = json!({
            "header":{"pageHeaderRenderer":{"pageTitle":"@SomeChannel"}}
        });
        let info = parse_channel_browse("UCxxx", &body);
        assert_eq!(info.name, "@SomeChannel");
    }
}