diff --git a/sidecar/crates/torttube-sidecar/Cargo.toml b/sidecar/crates/torttube-sidecar/Cargo.toml index c363fff..2e67476 100644 --- a/sidecar/crates/torttube-sidecar/Cargo.toml +++ b/sidecar/crates/torttube-sidecar/Cargo.toml @@ -11,4 +11,23 @@ name = "torttube-sidecar" path = "src/main.rs" [dependencies] -# M1 — rustypipe (codeberg.org/ThetaDev/rustypipe), tokio, serde, serde_json, reqwest +# Tier 1 — native Rust Innertube +rustypipe = "0.11" + +# Tier 2 + 3 — yt-dlp subprocess shell-out (no library, just std::process) + +# Runtime + JSON-over-stdio +tokio = { version = "1", features = ["rt-multi-thread", "macros", "io-std", "io-util", "process", "fs"] } +serde = { version = "1", features = ["derive"] } +serde_json = "1" + +# Errors + logging +anyhow = "1" +thiserror = "1" +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } + +# SponsorBlock — sha256 + REST +sha2 = "0.10" +hex = "0.4" +reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "json"] } diff --git a/sidecar/crates/torttube-sidecar/src/main.rs b/sidecar/crates/torttube-sidecar/src/main.rs index 543e3a8..49a53d4 100644 --- a/sidecar/crates/torttube-sidecar/src/main.rs +++ b/sidecar/crates/torttube-sidecar/src/main.rs @@ -1,9 +1,199 @@ // torttube-sidecar — JSON-over-stdio bridge between Kodi (Python) and YouTube extraction. // SPDX-License-Identifier: GPL-3.0-or-later // -// Reads one JSON request per line from stdin, writes one JSON response per line to stdout. -// M0 scaffold — handlers land in M1+ (resolve, sponsorblock, search, channel, playlist). +// Protocol: one JSON request per line on stdin, one JSON response per line on stdout. +// Logs go to stderr (so Kodi can capture them separately from the JSON stream). +// +// Ops: +// {"op":"resolve","id":""} Tier 1 (rustypipe) → Tier 2 (yt-dlp) fallback +// {"op":"rip","id":"","dest_dir":""} Tier 3 (yt-dlp download) +// {"op":"sponsorblock","id":"","categories":["sponsor","selfpromo"]} +// {"op":"ping"} liveness check +// +// Responses always carry `ok: bool`. On `ok:false` an `error` field gives the typed reason. -fn main() { - eprintln!("torttube-sidecar M0 scaffold — see MILESTONES.md"); +use anyhow::Context; +use serde::{Deserialize, Serialize}; +use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader}; + +mod resolve; +mod rip; +mod sponsor; + +#[derive(Debug, Deserialize)] +#[serde(tag = "op", rename_all = "snake_case")] +enum Request { + Ping, + Resolve { id: String }, + Rip { id: String, dest_dir: String }, + Sponsorblock { + id: String, + #[serde(default = "sponsor::default_categories")] + categories: Vec, + }, +} + +#[derive(Debug, Serialize)] +#[serde(untagged)] +enum Response { + Ok(serde_json::Value), + Err { ok: bool, error: String, kind: ErrorKind }, +} + +#[derive(Debug, Serialize)] +#[serde(rename_all = "snake_case")] +enum ErrorKind { + BadRequest, + AgeRestricted, + RegionBlocked, + PrivateVideo, + NotFound, + Extractor, + Network, + Io, + Internal, +} + +impl Response { + fn ok(value: serde_json::Value) -> Self { + // Inject ok:true into the value if it's an object, otherwise wrap it. + let value = match value { + serde_json::Value::Object(mut map) => { + map.insert("ok".into(), serde_json::Value::Bool(true)); + serde_json::Value::Object(map) + } + other => serde_json::json!({ "ok": true, "value": other }), + }; + Response::Ok(value) + } + + fn err(kind: ErrorKind, error: impl Into) -> Self { + Response::Err { ok: false, kind, error: error.into() } + } +} + +#[tokio::main(flavor = "multi_thread", worker_threads = 2)] +async fn main() -> anyhow::Result<()> { + tracing_subscriber::fmt() + .with_env_filter( + tracing_subscriber::EnvFilter::try_from_env("TORTTUBE_LOG") + .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")), + ) + .with_writer(std::io::stderr) + .init(); + + tracing::info!("torttube-sidecar starting (pid={})", std::process::id()); + + let stdin = tokio::io::stdin(); + let mut stdout = tokio::io::stdout(); + let mut reader = BufReader::new(stdin).lines(); + + while let Some(line) = reader.next_line().await? { + let line = line.trim(); + if line.is_empty() { + continue; + } + let response = handle_line(line).await; + let json = serde_json::to_string(&response) + .unwrap_or_else(|e| format!(r#"{{"ok":false,"error":"serialize: {e}","kind":"internal"}}"#)); + stdout.write_all(json.as_bytes()).await?; + stdout.write_all(b"\n").await?; + stdout.flush().await?; + } + + tracing::info!("torttube-sidecar stdin closed, exiting cleanly"); + Ok(()) +} + +async fn handle_line(line: &str) -> Response { + let req: Request = match serde_json::from_str(line) { + Ok(r) => r, + Err(e) => { + tracing::warn!(error = %e, "bad request"); + return Response::err(ErrorKind::BadRequest, format!("parse: {e}")); + } + }; + + match req { + Request::Ping => Response::ok(serde_json::json!({ "pong": true })), + Request::Resolve { id } => match resolve::resolve(&id).await { + Ok(v) => Response::ok(v), + Err(e) => e.into(), + }, + Request::Rip { id, dest_dir } => match rip::rip(&id, &dest_dir).await { + Ok(v) => Response::ok(v), + Err(e) => e.into(), + }, + Request::Sponsorblock { id, categories } => match sponsor::fetch(&id, &categories).await { + Ok(v) => Response::ok(v), + Err(e) => Response::err(ErrorKind::Network, format!("sponsorblock: {e}")), + }, + } +} + +/// Common error returned by resolve/rip handlers — gets mapped to a typed Response. +#[derive(Debug, thiserror::Error)] +pub enum HandlerError { + #[error("age-restricted")] + AgeRestricted, + #[error("region-blocked")] + RegionBlocked, + #[error("private video")] + PrivateVideo, + #[error("not found")] + NotFound, + #[error("extractor: {0}")] + Extractor(String), + #[error("network: {0}")] + Network(String), + #[error("io: {0}")] + Io(String), + #[error("internal: {0}")] + Internal(String), +} + +impl From for Response { + fn from(e: HandlerError) -> Self { + let kind = match &e { + HandlerError::AgeRestricted => ErrorKind::AgeRestricted, + HandlerError::RegionBlocked => ErrorKind::RegionBlocked, + HandlerError::PrivateVideo => ErrorKind::PrivateVideo, + HandlerError::NotFound => ErrorKind::NotFound, + HandlerError::Extractor(_) => ErrorKind::Extractor, + HandlerError::Network(_) => ErrorKind::Network, + HandlerError::Io(_) => ErrorKind::Io, + HandlerError::Internal(_) => ErrorKind::Internal, + }; + Response::err(kind, e.to_string()) + } +} + +impl From for HandlerError { + fn from(e: std::io::Error) -> Self { + HandlerError::Io(e.to_string()) + } +} + +impl From for HandlerError { + fn from(e: anyhow::Error) -> Self { + HandlerError::Internal(e.to_string()) + } +} + +/// Small helper for shelling out to yt-dlp. Returns stdout on success, anyhow on failure. +pub(crate) async fn run_yt_dlp(args: &[&str]) -> anyhow::Result> { + let output = tokio::process::Command::new("yt-dlp") + .args(args) + .output() + .await + .with_context(|| format!("spawning yt-dlp {args:?}"))?; + if !output.status.success() { + anyhow::bail!( + "yt-dlp {:?} exited {}: {}", + args, + output.status, + String::from_utf8_lossy(&output.stderr).trim() + ); + } + Ok(output.stdout) } diff --git a/sidecar/crates/torttube-sidecar/src/resolve.rs b/sidecar/crates/torttube-sidecar/src/resolve.rs new file mode 100644 index 0000000..1aad439 --- /dev/null +++ b/sidecar/crates/torttube-sidecar/src/resolve.rs @@ -0,0 +1,146 @@ +// resolve.rs — Tier 1 (rustypipe) → Tier 2 (yt-dlp -j fallback) +// SPDX-License-Identifier: GPL-3.0-or-later + +use serde_json::Value; + +use crate::{run_yt_dlp, HandlerError}; + +/// Top-level resolve. Tries Tier 1 (rustypipe), falls back to Tier 2 (yt-dlp -j). +pub(crate) async fn resolve(id: &str) -> Result { + match tier1_rustypipe(id).await { + Ok(v) => { + tracing::info!(id, source = "rustypipe", "resolve ok"); + Ok(v) + } + Err(e) => { + tracing::warn!(id, error = %e, "rustypipe failed; falling back to yt-dlp"); + // Typed errors that mean "video can't be played by anyone" — don't retry yt-dlp, + // it'll just hit the same wall. + if matches!( + e, + HandlerError::AgeRestricted + | HandlerError::PrivateVideo + | HandlerError::NotFound + ) { + return Err(e); + } + tier2_yt_dlp(id).await + } + } +} + +/// Tier 1 — native rustypipe. Serializes the whole player.details + selected streams as +/// opaque pass-through JSON. The Python addon parses the fields it needs; this keeps us +/// resilient to rustypipe shape evolution and unblocks tier-2 normalization later. +async fn tier1_rustypipe(id: &str) -> Result { + use rustypipe::client::RustyPipe; + use rustypipe::param::StreamFilter; + + let rp = RustyPipe::new(); + let player = rp + .query() + .player(id) + .await + .map_err(|e| classify_rustypipe_error(&e))?; + + let (video, audio) = player.select_video_audio_stream(&StreamFilter::default()); + + let details_json = serde_json::to_value(&player.details) + .map_err(|e| HandlerError::Internal(format!("serialize details: {e}")))?; + let video_json = video + .map(|v| serde_json::to_value(v)) + .transpose() + .map_err(|e| HandlerError::Internal(format!("serialize video: {e}")))? + .unwrap_or(Value::Null); + let audio_json = audio + .map(|a| serde_json::to_value(a)) + .transpose() + .map_err(|e| HandlerError::Internal(format!("serialize audio: {e}")))? + .unwrap_or(Value::Null); + + Ok(serde_json::json!({ + "source": "rustypipe", + "details": details_json, + "video_stream": video_json, + "audio_stream": audio_json, + })) +} + +/// Classify a rustypipe error into one of our typed handler errors. +/// rustypipe's error enum varies by version; we match on the Display string for resilience. +fn classify_rustypipe_error(e: &dyn std::fmt::Display) -> HandlerError { + let msg = e.to_string().to_lowercase(); + if msg.contains("age") && msg.contains("restrict") { + HandlerError::AgeRestricted + } else if msg.contains("region") || msg.contains("country") || msg.contains("geo") { + HandlerError::RegionBlocked + } else if msg.contains("private") { + HandlerError::PrivateVideo + } else if msg.contains("not found") || msg.contains("unavailable") { + HandlerError::NotFound + } else if msg.contains("network") || msg.contains("timeout") || msg.contains("connect") { + HandlerError::Network(msg) + } else { + HandlerError::Extractor(msg) + } +} + +/// Tier 2 — shell out to yt-dlp -j. +async fn tier2_yt_dlp(id: &str) -> Result { + let url = format!("https://www.youtube.com/watch?v={id}"); + let stdout = run_yt_dlp(&["-j", "--no-warnings", "--no-playlist", &url]) + .await + .map_err(|e| { + let msg = e.to_string().to_lowercase(); + if msg.contains("age") { + HandlerError::AgeRestricted + } else if msg.contains("private") { + HandlerError::PrivateVideo + } else if msg.contains("not available") || msg.contains("does not exist") { + HandlerError::NotFound + } else if msg.contains("geo") || msg.contains("region") { + HandlerError::RegionBlocked + } else { + HandlerError::Extractor(msg) + } + })?; + + let dump: Value = serde_json::from_slice(&stdout) + .map_err(|e| HandlerError::Extractor(format!("yt-dlp json parse: {e}")))?; + + // yt-dlp's JSON has a `formats` array. We pass it through largely as-is — the addon + // can pick what inputstream.adaptive wants. Shape it to match our protocol. + let streams: Vec = dump + .get("formats") + .and_then(Value::as_array) + .cloned() + .unwrap_or_default() + .into_iter() + .filter_map(|f| { + let url = f.get("url")?.as_str()?.to_string(); + let vcodec = f.get("vcodec").and_then(Value::as_str).unwrap_or("none"); + let acodec = f.get("acodec").and_then(Value::as_str).unwrap_or("none"); + let is_audio_only = vcodec == "none" && acodec != "none"; + let is_video_only = vcodec != "none" && acodec == "none"; + Some(serde_json::json!({ + "url": url, + "itag": f.get("format_id").and_then(|v| v.as_str()).and_then(|s| s.parse::().ok()), + "mime": f.get("ext"), + "width": f.get("width"), + "height": f.get("height"), + "bitrate": f.get("tbr"), + "is_audio_only": is_audio_only, + "is_video_only": is_video_only, + })) + }) + .collect(); + + Ok(serde_json::json!({ + "source": "yt-dlp", + "title": dump.get("title"), + "duration_s": dump.get("duration"), + "channel_name": dump.get("channel"), + "channel_id": dump.get("channel_id"), + "streams": streams, + })) +} diff --git a/sidecar/crates/torttube-sidecar/src/rip.rs b/sidecar/crates/torttube-sidecar/src/rip.rs new file mode 100644 index 0000000..0e2c29f --- /dev/null +++ b/sidecar/crates/torttube-sidecar/src/rip.rs @@ -0,0 +1,61 @@ +// rip.rs — Tier 3: yt-dlp downloads the video to a local file, addon plays from disk. +// SPDX-License-Identifier: GPL-3.0-or-later +// +// Last resort when stream URLs die mid-play (poToken expiry, cookie session mismatch, +// HLS chunk 403s). Kodi can play partially-downloaded files, but for simplicity v0.1 +// waits for the full download to finish. + +use crate::{run_yt_dlp, HandlerError}; +use serde_json::Value; +use std::path::Path; + +pub(crate) async fn rip(id: &str, dest_dir: &str) -> Result { + // Ensure dest dir exists. + tokio::fs::create_dir_all(dest_dir).await?; + + let dest_dir = Path::new(dest_dir); + if !dest_dir.is_dir() { + return Err(HandlerError::Io(format!( + "dest_dir not a directory: {}", + dest_dir.display() + ))); + } + + // Use yt-dlp's output template so the resulting filename embeds the id we can grep for. + // -f bestvideo+bestaudio/best lets yt-dlp pick a mergable pair when possible. + let url = format!("https://www.youtube.com/watch?v={id}"); + let output_template = dest_dir.join("%(id)s.%(ext)s"); + let output_template_str = output_template.to_string_lossy().into_owned(); + + let stdout = run_yt_dlp(&[ + "-o", + &output_template_str, + "-f", + "bestvideo+bestaudio/best", + "--no-playlist", + "--print", + "after_move:filepath", + &url, + ]) + .await + .map_err(|e| HandlerError::Extractor(format!("yt-dlp rip: {e}")))?; + + let file_path = String::from_utf8_lossy(&stdout).trim().to_string(); + if file_path.is_empty() { + return Err(HandlerError::Extractor( + "yt-dlp rip: no filepath in output".to_string(), + )); + } + + let meta = tokio::fs::metadata(&file_path).await.map_err(|e| { + HandlerError::Io(format!("stat ripped file {file_path}: {e}")) + })?; + + tracing::info!(id, path = %file_path, size = meta.len(), "rip ok"); + + Ok(serde_json::json!({ + "source": "yt-dlp-rip", + "path": file_path, + "size_bytes": meta.len(), + })) +} diff --git a/sidecar/crates/torttube-sidecar/src/sponsor.rs b/sidecar/crates/torttube-sidecar/src/sponsor.rs new file mode 100644 index 0000000..3e828df --- /dev/null +++ b/sidecar/crates/torttube-sidecar/src/sponsor.rs @@ -0,0 +1,115 @@ +// sponsor.rs — SponsorBlock client. +// SPDX-License-Identifier: GPL-3.0-or-later +// +// Privacy-preserving lookup: SHA-256 the video ID, send the first 4 hex chars as a prefix, +// SponsorBlock returns all segments for videos sharing that prefix; we filter locally. +// Spec: https://wiki.sponsor.ajay.app/w/API_Docs#GET_/api/skipSegments/:sha256HashPrefix + +use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; + +const SPONSORBLOCK_API: &str = "https://sponsor.ajay.app/api/skipSegments"; + +pub(crate) fn default_categories() -> Vec { + vec!["sponsor".into(), "selfpromo".into(), "interaction".into()] +} + +#[derive(Debug, Deserialize)] +struct ApiResponse { + #[serde(rename = "videoID")] + video_id: String, + segments: Vec, +} + +#[derive(Debug, Deserialize, Serialize)] +struct ApiSegment { + #[serde(rename = "UUID")] + uuid: String, + segment: [f64; 2], + category: String, + #[serde(rename = "actionType")] + action_type: String, + #[serde(rename = "videoDuration", default)] + video_duration: f64, + #[serde(rename = "userID", default)] + user_id: String, + #[serde(default)] + votes: i32, + #[serde(default)] + locked: u8, + #[serde(default)] + description: String, +} + +pub(crate) async fn fetch(id: &str, categories: &[String]) -> anyhow::Result { + let prefix = hash_prefix(id, 4); + let categories_json = serde_json::to_string(categories)?; + let url = format!( + "{SPONSORBLOCK_API}/{prefix}?categories={enc}", + enc = urlencode(&categories_json) + ); + + let client = reqwest::Client::builder() + .user_agent(concat!("torttube-sidecar/", env!("CARGO_PKG_VERSION"))) + .timeout(std::time::Duration::from_secs(10)) + .build()?; + + let resp = client.get(&url).send().await?; + if resp.status() == reqwest::StatusCode::NOT_FOUND { + // No segments matching the prefix at all. + return Ok(serde_json::json!({ "segments": [] })); + } + if !resp.status().is_success() { + anyhow::bail!("sponsorblock http {}", resp.status()); + } + + let body: Vec = resp.json().await?; + + // Filter to the exact video id (the API returns all videos sharing the prefix). + let segments: Vec<&ApiSegment> = body + .iter() + .filter(|r| r.video_id == id) + .flat_map(|r| r.segments.iter()) + .collect(); + + Ok(serde_json::json!({ + "video_id": id, + "segments": segments, + })) +} + +fn hash_prefix(id: &str, chars: usize) -> String { + let mut hasher = Sha256::new(); + hasher.update(id.as_bytes()); + let digest = hasher.finalize(); + let hex_str = hex::encode(digest); + hex_str.chars().take(chars).collect() +} + +/// Tiny URL-encoder for query parameter values. Only encodes characters that actually break +/// query parsing — keeps the body readable in logs. +fn urlencode(s: &str) -> String { + let mut out = String::with_capacity(s.len()); + for b in s.bytes() { + match b { + b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => { + out.push(b as char); + } + _ => out.push_str(&format!("%{:02X}", b)), + } + } + out +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn hash_prefix_known_value() { + // sha256("dQw4w9WgXcQ") starts with "9145..." — actually unknown, just verify shape. + let p = hash_prefix("dQw4w9WgXcQ", 4); + assert_eq!(p.len(), 4); + assert!(p.chars().all(|c| c.is_ascii_hexdigit())); + } +}