#![doc = include_str!("../README.md")] #![cfg_attr(docsrs, feature(doc_cfg))] #![warn(missing_docs, clippy::todo, clippy::dbg_macro)] mod error; mod util; use std::{ borrow::Cow, cmp::Ordering, ffi::OsString, ops::Range, path::{Path, PathBuf}, sync::Arc, time::Duration, }; use futures_util::stream::{self, StreamExt, TryStreamExt}; use once_cell::sync::Lazy; use rand::Rng; use regex::Regex; use reqwest::{header, Client, StatusCode, Url}; use rustypipe::{ client::{ClientType, RustyPipe}, model::{ traits::{FileFormat, YtEntity}, AudioCodec, TrackItem, VideoCodec, VideoPlayer, }, param::StreamFilter, }; use tokio::{ fs::{self, File}, io::AsyncWriteExt, process::Command, }; #[cfg(feature = "indicatif")] use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; #[cfg(feature = "audiotag")] use lofty::{config::WriteOptions, picture::Picture, prelude::*, tag::Tag}; #[cfg(feature = "audiotag")] use rustypipe::model::{richtext::ToPlaintext, VideoDetails, VideoPlayerDetails}; #[cfg(feature = "audiotag")] use time::{Date, OffsetDateTime}; pub use error::DownloadError; type Result = core::result::Result; const CHUNK_SIZE_MIN: u64 = 9_000_000; const CHUNK_SIZE_MAX: u64 = 10_000_000; /// RustyPipe audio/video downloader /// /// The downloader uses an [`Arc`] internally, so if you are using the client /// at multiple locations, you can just clone it. #[derive(Clone)] pub struct Downloader { i: Arc, } /// Builder to construct a new downloader pub struct DownloaderBuilder { rp: Option, ffmpeg: String, #[cfg(feature = "indicatif")] multi: Option, #[cfg(feature = "indicatif")] progress_style: Option, filter: StreamFilter, video_format: DownloadVideoFormat, n_retries: u32, path_precheck: bool, #[cfg(feature = "audiotag")] audio_tag: bool, #[cfg(feature = "audiotag")] crop_cover: bool, client_types: Option>, } struct DownloaderInner { /// YT client rp: RustyPipe, /// HTTP client http: Client, /// Path to the ffmpeg binary ffmpeg: String, /// Global progress #[cfg(feature = "indicatif")] multi: Option, /// Progress style #[cfg(feature = "indicatif")] progress_style: ProgressStyle, /// Default stream filter filter: StreamFilter, /// Default video format video_format: DownloadVideoFormat, /// Number of retries in case of 403 error n_retries: u32, /// Check if destination path exists before player is fetched path_precheck: bool, /// Apply metadata to audio files #[cfg(feature = "audiotag")] audio_tag: bool, /// Crop YT thumbnails to ensure square album covers #[cfg(feature = "audiotag")] crop_cover: bool, /// Client types for fetching videos client_types: Option>, } /// Download query pub struct DownloadQuery { /// RustyPipe Downloader dl: Downloader, /// Video to download video: DownloadVideo, /// Destination dest: DownloadDest, /// Progress bar #[cfg(feature = "indicatif")] progress: Option, /// Stream filter filter: Option, /// Target video format video_format: Option, /// Client types for fetching videos client_types: Option>, } /// Video to be downloaded #[derive(Default)] pub struct DownloadVideo { id: String, name: Option, channel_id: Option, channel_name: Option, album_id: Option, album_name: Option, track_nr: Option, } impl DownloadVideo { /// Get the YouTube video id pub fn id(&self) -> &str { &self.id } /// Create a new DownloadVideo from a YouTube entity pub fn from_entity(video: &impl YtEntity) -> Self { DownloadVideo { id: video.id().to_owned(), name: Some(video.name().to_owned()), channel_id: video.channel_id().map(str::to_owned), channel_name: video .channel_name() .map(|n| n.strip_suffix("- Topic").unwrap_or(n).trim().to_owned()), album_id: None, album_name: None, track_nr: None, } } /// Create a new DownloadVideo from a YTM track pub fn from_track(track: &TrackItem) -> Self { DownloadVideo { id: track.id.to_owned(), name: Some(track.name.to_owned()), channel_id: track.channel_id().map(str::to_owned), channel_name: track.channel_name().map(str::to_owned), album_id: track.album.as_ref().map(|b| b.id.to_owned()), album_name: track.album.as_ref().map(|b| b.name.to_owned()), track_nr: track.track_nr, } } } #[derive(Clone)] enum DownloadDest { Default, File(PathBuf), Dir(PathBuf), Template(PathBuf), } fn video_filename(v: &DownloadVideo) -> String { let mut n = format!("{} [{}]", v.name.as_deref().unwrap_or_default(), v.id); if let Some(track_nr) = v.track_nr { n = format!("{track_nr:02} {n}"); } filenamify_lim(&n) } /// Video container format for downloading #[derive(Default, Debug, Copy, Clone, PartialEq, Eq)] pub enum DownloadVideoFormat { /// .mp4 #[default] Mp4, /// .mkv Mkv, /// .webm Webm, } impl DownloadVideoFormat { /// Get the video format file extension pub fn extension(&self) -> &'static str { match self { DownloadVideoFormat::Mp4 => "mp4", DownloadVideoFormat::Mkv => "mkv", DownloadVideoFormat::Webm => "webm", } } /// Get the video format from the given file extension pub fn from_extension(ext: &str) -> Option { match ext { "mp4" => Some(Self::Mp4), "mkv" => Some(Self::Mkv), "webm" => Some(Self::Webm), _ => None, } } } impl DownloadDest { fn get_dest_path(&self, v: &DownloadVideo) -> PathBuf { static RE_TEMPLATE: Lazy = Lazy::new(|| Regex::new(r#"\{\w+\} *"#).unwrap()); match self { DownloadDest::Default => PathBuf::from(video_filename(v)), DownloadDest::File(p) => p.clone(), DownloadDest::Dir(p) => p.join(video_filename(v)), DownloadDest::Template(t) => t .iter() .map(|part| { let s = part.to_string_lossy(); let (mut replaced, last_end) = RE_TEMPLATE.find_iter(&s).fold( (String::new(), 0), |(mut acc, last_end), m| { acc += &s[last_end..m.start()]; let ms = m.as_str(); let trimmed = ms.trim_end_matches(' '); let repl: Option> = match trimmed.trim_matches(['{', '}']) { "id" => Some(v.id.as_str().into()), "title" => v.name.as_deref().map(Cow::from), "channel" => v.channel_name.as_deref().map(Cow::from), "channelId" => v.channel_id.as_deref().map(Cow::from), "album" => v.album_name.as_deref().map(Cow::from), "albumId" => v.album_id.as_deref().map(Cow::from), "track" => v.track_nr.map(|n| format!("{n:02}").into()), _ => None, }; if let Some(repl) = repl { acc += &repl; acc += &ms[trimmed.len()..]; // preceeding whitespace } (acc, m.end()) }, ); replaced += &s[last_end..]; replaced = replaced.trim().to_owned(); if replaced.is_empty() { "-".to_owned() } else { filenamify_lim(&replaced) } }) .collect(), } } } impl Default for DownloaderBuilder { fn default() -> Self { Self { rp: None, ffmpeg: "ffmpeg".to_owned(), #[cfg(feature = "indicatif")] multi: None, #[cfg(feature = "indicatif")] progress_style: None, filter: StreamFilter::new(), video_format: DownloadVideoFormat::Mp4, n_retries: 3, path_precheck: false, #[cfg(feature = "audiotag")] audio_tag: false, #[cfg(feature = "audiotag")] crop_cover: false, client_types: None, } } } impl DownloaderBuilder { /// Create a new [`DownloaderBuilder`] /// /// This is the same as [`Downloader::builder`] pub fn new() -> Self { Self::default() } /// Use a custom [`RustyPipe`] client #[must_use] pub fn rustypipe(mut self, rp: &RustyPipe) -> Self { self.rp = Some(rp.clone()); self } /// Set the path to ffmpeg, used to join video and audio files /// /// The default system-wide `ffmpeg` binary is used by default. #[must_use] pub fn ffmpeg>(mut self, ffmpeg: S) -> Self { self.ffmpeg = ffmpeg.into(); self } /// Set the indicatif [`MultiProgress`] used to show download progress /// for all downloads #[cfg(feature = "indicatif")] #[cfg_attr(docsrs, doc(cfg(feature = "indicatif")))] #[must_use] pub fn multi_progress(mut self, progress: MultiProgress) -> Self { self.multi = Some(progress); self } /// Set the indicatif [`ProgressStyle`] for the progress bars displayed under `multi_progress` #[cfg(feature = "indicatif")] #[cfg_attr(docsrs, doc(cfg(feature = "indicatif")))] #[must_use] pub fn progress_style(mut self, style: ProgressStyle) -> Self { self.progress_style = Some(style); self } /// Set the default [`StreamFilter`] for all downloads. /// /// The filter can be overridden for individual download queries. #[must_use] pub fn stream_filter(mut self, filter: StreamFilter) -> Self { self.filter = filter; self } /// Set the [`DownloadVideoFormat`] of downloaded videos #[must_use] pub fn video_format(mut self, video_format: DownloadVideoFormat) -> Self { self.video_format = video_format; self } /// Set the number of retries in case a download fails with a 403 error #[must_use] pub fn n_retries(mut self, n_retries: u32) -> Self { self.n_retries = n_retries; self } /// Enable path precheck /// /// The downloader will check if the destination path /// (predicted from the entity to download and the StreamFilter) exists and /// skips the download with [`DownloadError::Exists`] without fetching any player data. /// /// This allows fast resumption of playlist downloads. #[must_use] pub fn path_precheck(mut self) -> Self { self.path_precheck = true; self } /// Enable audio tagging #[cfg(feature = "audiotag")] #[cfg_attr(docsrs, doc(cfg(feature = "audiotag")))] #[must_use] pub fn audio_tag(mut self) -> Self { self.audio_tag = true; self } /// Crop YouTube thumbnails to get square album covers #[cfg(feature = "audiotag")] #[cfg_attr(docsrs, doc(cfg(feature = "audiotag")))] #[must_use] pub fn crop_cover(mut self) -> Self { self.crop_cover = true; self } /// Set the [`ClientType`] used to fetch the YT player #[must_use] pub fn client_type(mut self, client_type: ClientType) -> Self { self.client_types = Some(vec![client_type]); self } /// Set a list of client types used to fetch the YT player /// /// The clients are used in the given order. If a client cannot fetch the requested video, /// an attempt is made with the next one. #[must_use] pub fn client_types>>(mut self, client_types: T) -> Self { self.client_types = Some(client_types.into()); self } /// Create a new, configured [`Downloader`] instance pub fn build(self) -> Downloader { self.build_with_client( Client::builder() .timeout(Duration::from_secs(20)) .build() .expect("http client"), ) } /// Create a new, configured [`Downloader`] instance using a custom Reqwest [`Client`] pub fn build_with_client(self, http_client: Client) -> Downloader { Downloader { i: Arc::new(DownloaderInner { rp: self.rp.unwrap_or_default(), http: http_client, ffmpeg: self.ffmpeg, #[cfg(feature = "indicatif")] multi: self.multi, #[cfg(feature = "indicatif")] progress_style: self.progress_style.unwrap_or_else(|| { ProgressStyle::with_template("{msg}\n{spinner:.green} [{elapsed_precise}] [{wide_bar:.cyan/blue}] {bytes}/{total_bytes} ({bytes_per_sec}, {eta})") .unwrap() .progress_chars("#>-") }), filter: self.filter, video_format: self.video_format, n_retries: self.n_retries, path_precheck: self.path_precheck, #[cfg(feature = "audiotag")] audio_tag: self.audio_tag, #[cfg(feature = "audiotag")] crop_cover: self.crop_cover, client_types: self.client_types, }), } } } impl Default for Downloader { fn default() -> Self { DownloaderBuilder::new().build() } } impl Downloader { /// Create a new [`Downloader`] using the given [`RustyPipe`] instance pub fn new(rp: &RustyPipe) -> Self { DownloaderBuilder::new().rustypipe(rp).build() } /// Create a new [`DownloaderBuilder`] /// /// This is the same as [`DownloaderBuilder::new`] pub fn builder() -> DownloaderBuilder { DownloaderBuilder::default() } fn query(&self, video: DownloadVideo) -> DownloadQuery { DownloadQuery { dl: self.clone(), video, dest: DownloadDest::Default, #[cfg(feature = "indicatif")] progress: None, filter: None, video_format: None, client_types: None, } } /// Download a video with the given ID #[must_use] pub fn id>(&self, video_id: S) -> DownloadQuery { self.query(DownloadVideo { id: video_id.into(), ..Default::default() }) } /// Download a video from a DownloadVideo object #[must_use] pub fn video(&self, video: DownloadVideo) -> DownloadQuery { self.query(video) } /// Download a video from a [`YtEntity`] object (e.g. playlist/channel video) /// /// Providing an entity has the advantage that the download path can be determined before the video /// is fetched, so already downloaded videos get skipped right away. #[must_use] pub fn entity(&self, video: &impl YtEntity) -> DownloadQuery { self.query(DownloadVideo::from_entity(video)) } /// Download a video from a [`TrackItem`] (YouTube Music album/playlist item) /// /// Providing an entity has the advantage that the download path can be determined before the video /// is fetched, so already downloaded videos get skipped right away. /// /// If an album track is downloaded, this method will also add the track number to the downloaded file #[must_use] pub fn track(&self, track: &TrackItem) -> DownloadQuery { self.query(DownloadVideo::from_track(track)) } } /// Output data from downloading a video pub struct DownloadResult { /// Download destination path pub dest: PathBuf, /// Fetched vvideo player data pub player_data: VideoPlayer, } impl DownloadQuery { /// Update the video format from the given path extension /// /// The video format is not updated if it was already manually set fn update_video_format(&mut self, path: &Path) { if self.video_format.is_none() { self.video_format = path .extension() .and_then(|ext| ext.to_str()) .and_then(DownloadVideoFormat::from_extension); } } /// Download to the given file /// /// Note that the file extension may be changed to fit the reuested video/audio format. /// Refer to the [`DownloadResult`] to get the actual path after downloading. #[must_use] pub fn to_file>(mut self, file: P) -> Self { let file = file.into(); self.update_video_format(&file); self.dest = DownloadDest::File(file); self } /// Download to the given directory /// /// The filename is created by this template: `{track} {title} [{id}]`. /// /// You can use a custom filename template using [`DownloadQuery::to_template`] #[must_use] pub fn to_dir>(mut self, dir: P) -> Self { self.dest = DownloadDest::Dir(dir.into()); self } /// Download to a path determined by a template /// /// Templates are paths that may contain variables for video metadata. /// /// ## Variables /// - `{id}` Video ID /// - `{title}` Video title /// - `{channel}` Channel name /// - `{channel_id}` Channel ID /// - `{album}` Album /// - `{album_id}` Album ID /// - `{track}` Track number /// /// Whitespace between template variables is automatically removed if a variable /// contains no data (e.g. `{track} {name}` is equal to `{name}` if a video without /// track number is downloaded). /// /// Note that the file extension may be changed to fit the reuested video/audio format. /// Refer to the [`DownloadResult`] to get the actual path after downloading. #[must_use] pub fn to_template>(mut self, tmpl: P) -> Self { let tmpl = tmpl.into(); self.update_video_format(&tmpl); self.dest = DownloadDest::Template(tmpl); self } /// Show the progress of this download using a Indicatif [`ProgressBar`] #[cfg(feature = "indicatif")] #[cfg_attr(docsrs, doc(cfg(feature = "indicatif")))] #[must_use] pub fn progress_bar(mut self, progress: ProgressBar) -> Self { self.progress = Some(progress); self } /// Set a [`StreamFilter`] for choosing a stream to be downloaded #[must_use] pub fn stream_filter(mut self, filter: StreamFilter) -> Self { self.filter = Some(filter); self } /// Set the [`DownloadVideoFormat`] of downloaded videos #[must_use] pub fn video_format(mut self, video_format: DownloadVideoFormat) -> Self { self.video_format = Some(video_format); self } /// Set the [`ClientType`] used to fetch the YT player #[must_use] pub fn client_type(mut self, client_type: ClientType) -> Self { self.client_types = Some(vec![client_type]); self } /// Set a list of client types used to fetch the YT player /// /// The clients are used in the given order. If a client cannot fetch the requested video, /// an attempt is made with the next one. #[must_use] pub fn client_types>>(mut self, client_types: T) -> Self { self.client_types = Some(client_types.into()); self } /// Download the video /// /// If no download path is set, the video is downloaded to the current directory /// with a filename created by this template: `{track} {title} [{id}]`. #[tracing::instrument(skip(self), level="error", fields(id = self.video.id))] pub async fn download(&self) -> Result { let mut last_err = None; let mut failed_client = None; // Progress bar #[cfg(feature = "indicatif")] let pb = match &self.progress { Some(progress) => Some(progress.clone()), None => self.dl.i.multi.clone().map(|m| { let pb = ProgressBar::new(1); pb.set_style(self.dl.i.progress_style.clone()); m.add(pb) }), }; for n in 0..=self.dl.i.n_retries { let err = match self .download_attempt( n, failed_client, #[cfg(feature = "indicatif")] &pb, ) .await { Ok(res) => return Ok(res), Err(DownloadError::Forbidden(c, vd)) => { failed_client = Some(c); DownloadError::Forbidden(c, vd) } Err(DownloadError::Http(e)) => { if !e.is_timeout() { return Err(DownloadError::Http(e)); } DownloadError::Http(e) } Err(e) => return Err(e), }; if n != self.dl.i.n_retries { tracing::warn!("Retry attempt #{}. Error: {}", n + 1, err); tokio::time::sleep(Duration::from_secs(1)).await; } last_err = Some(err); } Err(last_err.unwrap()) } async fn download_attempt( &self, #[allow(unused_variables)] n: u32, failed_client: Option, #[cfg(feature = "indicatif")] pb: &Option, ) -> Result { let filter = self.filter.as_ref().unwrap_or(&self.dl.i.filter); let video_format = self.video_format.unwrap_or(self.dl.i.video_format); // Check if already downloaded if self.video.name.is_some() && self.dl.i.path_precheck { let op = self.dest.get_dest_path(&self.video); if filter.is_video_none() { for ext in ["m4a", "opus"] { let p = op.with_extension(ext); if p.is_file() { return Err(DownloadError::Exists(p)); } } } else { let p = op.with_extension(video_format.extension()); if p.is_file() { return Err(DownloadError::Exists(p)); } } } #[cfg(feature = "indicatif")] let attempt_suffix = if n > 0 { format!(" (retry #{n})") } else { String::new() }; #[cfg(feature = "indicatif")] if let Some(pb) = pb { if let Some(n) = &self.video.name { pb.set_message(format!("Fetching player data for {n}{attempt_suffix}")); } else { pb.set_message(format!("Fetching player data{attempt_suffix}")); } } let q = self.dl.i.rp.query(); let mut client_types = Cow::Borrowed( self.client_types .as_ref() .or(self.dl.i.client_types.as_ref()) .map(Vec::as_slice) .unwrap_or(q.player_client_order()), ); // If the last download failed, try another client if possible if let Some(failed_client) = failed_client { if let Some(pos) = client_types.iter().position(|c| c == &failed_client) { let p2 = pos + 1; if p2 < client_types.len() { let mut v = client_types[p2..].to_vec(); v.extend(&client_types[..p2]); client_types = v.into(); } } } let player_data = q.player_from_clients(&self.video.id, &client_types).await?; let user_agent = q.user_agent(player_data.client_type); // Select streams to download let (video, audio) = player_data.select_video_audio_stream(filter); if video.is_none() && audio.is_none() { if player_data.drm.is_some() { return Err(DownloadError::Source("video is DRM-protected".into())); } return Err(DownloadError::Source("no stream found".into())); } let extension = match video { Some(_) => video_format.extension(), None => match audio { Some(audio) => match audio.codec { AudioCodec::Mp4a => "m4a", AudioCodec::Opus => "opus", AudioCodec::Ac3 => "ac3", AudioCodec::Ec3 => "eac3", _ => return Err(DownloadError::Source("unknown audio codec".into())), }, None => unreachable!(), }, }; let (name, details) = match &player_data.details.name { Some(n) => (n.to_owned(), None), None => { let details = self.dl.i.rp.query().video_details(&self.video.id).await?; (details.name.to_owned(), Some(details)) } }; let pv = DownloadVideo { id: player_data.details.id.to_owned(), name: Some(name.to_owned()), channel_id: Some(player_data.details.channel_id.to_owned()), channel_name: player_data .details .channel_name .clone() .or(details.as_ref().map(|d| d.channel.name.to_owned())), album_id: self.video.album_id.to_owned(), album_name: self.video.album_name.to_owned(), track_nr: self.video.track_nr, }; let output_path = self.dest.get_dest_path(&pv).with_extension(extension); if output_path.exists() { return Err(DownloadError::Exists(output_path)); } if let Some(parent) = output_path.parent() { std::fs::create_dir_all(parent)?; } let mut downloads: Vec = Vec::new(); if let Some(v) = video { downloads.push(StreamDownload { file: output_path.with_extension(format!("video{}", v.format.extension())), url: v.url.clone(), video_codec: Some(v.codec), audio_codec: None, }); } if let Some(a) = audio { downloads.push(StreamDownload { file: output_path.with_extension(format!("audio{}", a.format.extension())), url: a.url.clone(), video_codec: None, audio_codec: Some(a.codec), }); } #[cfg(feature = "indicatif")] if let Some(pb) = pb { pb.set_message(format!("Downloading {name}{attempt_suffix}")) } let downloads = download_streams( downloads, &self.dl.i.http, &user_agent, #[cfg(feature = "indicatif")] pb.clone(), ) .await .map_err(|e| { if let DownloadError::Http(e) = &e { if e.status() == Some(StatusCode::FORBIDDEN) { // 403 errors may occur due to bad visitor data IDs if let Some(vd) = &player_data.visitor_data { q.remove_visitor_data(vd); } return DownloadError::Forbidden( player_data.client_type, player_data.visitor_data.clone(), ); } } e })?; #[cfg(feature = "indicatif")] if let Some(pb) = &pb { pb.set_message(format!("Converting {name}")); pb.set_style( ProgressStyle::with_template("{msg}\n{spinner:.green} [{elapsed_precise}]") .unwrap(), ); pb.enable_steady_tick(Duration::from_millis(500)); } convert_streams(&downloads, &output_path, &self.dl.i.ffmpeg, &name).await?; // Tag audio file #[cfg(feature = "audiotag")] if self.dl.i.audio_tag && video.is_none() && matches!(extension, "m4a" | "opus") { let (details, track) = match details { Some(d) => (d, self.dl.i.rp.query().music_details(&self.video.id).await?), None => { let q = self.dl.i.rp.query(); tokio::try_join!( q.video_details(&self.video.id), q.music_details(&self.video.id) )? } }; self.apply_audio_tags( &output_path, details, &player_data.details, track.track, pv.track_nr, ) .await?; } #[cfg(feature = "indicatif")] if let Some(pb) = pb { pb.disable_steady_tick(); } // Delete original files for d in &downloads { fs::remove_file(&d.file).await?; } #[cfg(feature = "indicatif")] if let Some(pb) = pb { pb.finish_and_clear(); } Ok(DownloadResult { dest: output_path, player_data, }) } #[cfg(feature = "audiotag")] async fn apply_audio_tags( &self, file: &Path, details: VideoDetails, player_details: &VideoPlayerDetails, track: TrackItem, track_nr: Option, ) -> Result<()> { use std::{io::Cursor, num::NonZeroU32}; let mut tagged_file = lofty::read_from_path(file)?; let tag = match tagged_file.primary_tag_mut() { Some(primary_tag) => primary_tag, None => { if let Some(first_tag) = tagged_file.first_tag_mut() { first_tag } else { let tag_type = tagged_file.primary_tag_type(); tagged_file.insert_tag(Tag::new(tag_type)); tagged_file.primary_tag_mut().unwrap() } } }; let description = details.description.to_plaintext(); tag.set_album( track .album .map(|b| b.name) .unwrap_or_else(|| track.name.clone()), ); tag.set_artist( track .artists .into_iter() .next() .map(|a| a.name) .unwrap_or(details.channel.name), ); tag.set_title(track.name); if let Some(release_date) = extract_yt_release_date(&description, details.publish_date) { if let Ok(date_str) = release_date.format(&YMD_FORMAT) { tag.insert_text(ItemKey::RecordingDate, date_str); } } tag.set_comment(description); if let Some(track_nr) = track_nr { tag.set_track(track_nr.into()); } // For YTM tracks the music details contain a high quality, square cover image, but for music videos // the cover images are cropped and of worse resolution. // Therefore we switch to the thumbnails from the player data if the music details contain no square // thumbnails. let thumbnail_music = track.cover.into_iter().max_by_key(|c| c.height); let thumbnail = if thumbnail_music .as_ref() .map(|tn| tn.height == tn.width) .unwrap_or_default() { thumbnail_music } else { let thumbnail_player = player_details .thumbnail .iter() .max_by_key(|c| c.height) .cloned(); thumbnail_player.or(thumbnail_music) }; if let Some(thumbnail) = thumbnail { // Attempt to get the higher resolution, uncropped maxresdefault.jpg thumbnail if available let mut resp = None; if thumbnail.height != thumbnail.width { if let Ok(x) = self .dl .i .http .get(format!( "https://i.ytimg.com/vi/{}/maxresdefault.jpg", track.id )) .send() .await? .error_for_status() { resp = Some(x); } } let resp = match resp { Some(resp) => resp, None => self .dl .i .http .get(thumbnail.url) .send() .await? .error_for_status()?, }; let img_type = resp .headers() .get(header::CONTENT_TYPE) .and_then(|fmt| fmt.to_str().ok()) .and_then(image::ImageFormat::from_mime_type); let img_bts = resp.bytes().await?; let mut lofty_img = if self.dl.i.crop_cover { // Crop cover image if it is not square if thumbnail.height != thumbnail.width { let mut img = if let Some(fmt) = img_type { image::load_from_memory_with_format(&img_bts, fmt)? } else { image::load_from_memory(&img_bts)? }; let crop = smartcrop::find_best_crop(&img, NonZeroU32::MIN, NonZeroU32::MIN) .map_err(|e| DownloadError::AudioTag(format!("image crop: {e}").into()))? .crop; img = img.crop_imm(crop.x, crop.y, crop.width, crop.height); let mut enc_bts = Vec::new(); img.write_with_encoder(image::codecs::jpeg::JpegEncoder::new_with_quality( &mut enc_bts, 90, ))?; let mut rd = Cursor::new(enc_bts); Picture::from_reader(&mut rd)? } else { let mut rd = Cursor::new(img_bts); Picture::from_reader(&mut rd)? } } else { let mut rd = Cursor::new(img_bts); Picture::from_reader(&mut rd)? }; lofty_img.set_pic_type(lofty::picture::PictureType::CoverFront); tag.set_picture(0, lofty_img); } tag.save_to_path(file, WriteOptions::default())?; Ok(()) } } fn get_download_range(offset: u64, size: Option) -> Range { let mut rng = rand::thread_rng(); let chunk_size = rng.gen_range(CHUNK_SIZE_MIN..CHUNK_SIZE_MAX); let mut chunk_end = offset + chunk_size; if let Some(size) = size { chunk_end = chunk_end.min(size - 1); } Range { start: offset, end: chunk_end, } } fn parse_cr_header(cr_header: &str) -> Result<(u64, u64)> { static PATTERN: Lazy = Lazy::new(|| Regex::new(r"bytes (\d+)-(\d+)/(\d+)").unwrap()); let captures = PATTERN.captures(cr_header).ok_or_else(|| { DownloadError::Progressive( format!("Content-Range header '{cr_header}' does not match pattern").into(), ) })?; Ok(( captures.get(2).unwrap().as_str().parse().map_err(|_| { DownloadError::Progressive("could not parse range header number".into()) })?, captures.get(3).unwrap().as_str().parse().map_err(|_| { DownloadError::Progressive("could not parse range header number".into()) })?, )) } fn filenamify_lim(name: &str) -> String { let lim = 200; let n = filenamify::filenamify(name); if n.len() > lim { n.char_indices() .take_while(|(i, _)| i < &lim) .map(|(_, c)| c) .collect::() } else { n } } async fn download_single_file( url: &str, output: &Path, http: &Client, user_agent: &str, #[cfg(feature = "indicatif")] pb: Option, ) -> Result<()> { // Check if file is already downloaded let output_path: PathBuf = output.into(); if output_path.exists() { return Ok(()); } let mut extension = OsString::from(output_path.extension().unwrap_or_default()); extension.push(".part"); let output_path_tmp = output_path.with_extension(extension); let mut offset: u64 = 0; let mut size: Option = None; // If the url is from googlevideo, extract file size from clen parameter let (url_base, url_params) = util::url_to_params(url).map_err(|e| DownloadError::Other(e.to_string().into()))?; let is_gvideo = url_base .as_str() .ends_with(".googlevideo.com/videoplayback"); if is_gvideo { size = url_params.get("clen").and_then(|s| s.parse::().ok()); } // Check if file is partially downloaded if output_path_tmp.exists() { let file_size = output_path_tmp.metadata()?.len(); let res = http .head(url.to_owned()) .header(header::USER_AGENT, user_agent) .header(header::RANGE, "bytes=0-0") .send() .await? .error_for_status()?; let cr_header = res .headers() .get(header::CONTENT_RANGE) .ok_or(DownloadError::Progressive(Cow::Borrowed( "Did not get Content-Range header", )))? .to_str() .map_err(|_| { DownloadError::Progressive( "could not convert Content-Range header to string".into(), ) })?; let (_, original_size) = parse_cr_header(cr_header)?; match file_size.cmp(&original_size) { Ordering::Less => { // Partially downloaded size = Some(original_size); offset = file_size; #[cfg(feature = "indicatif")] if let Some(pb) = &pb { pb.inc_length(original_size); pb.inc(offset); } } Ordering::Equal => { // Already downloaded fs::rename(output_path_tmp, output_path).await?; return Ok(()); } Ordering::Greater => { // WTF? return Err(DownloadError::Other( format!( "Already downloaded file {} is larger than original", output_path_tmp.to_str().unwrap_or_default() ) .into(), )); } } } let mut file = fs::OpenOptions::new() .append(true) .create(true) .open(&output_path_tmp) .await?; let res = if is_gvideo && size.is_some() { download_chunks_by_param( http, &mut file, url, size.unwrap(), offset, user_agent, #[cfg(feature = "indicatif")] pb, ) .await } else { download_chunks_by_header( http, &mut file, url, size, offset, user_agent, #[cfg(feature = "indicatif")] pb, ) .await }; drop(file); if let Err(e) = res { // Remove temporary file if nothing was downloaded (e.g. 403 error) if std::fs::metadata(&output_path_tmp) .map(|md| md.len() == 0) .unwrap_or_default() { _ = std::fs::remove_file(&output_path_tmp); } return Err(e); } fs::rename(&output_path_tmp, &output_path).await?; Ok(()) } // Use the HTTP range header to download a stream in chunks. // This is the standardized method that works on all web servers, // but I have observed throttling using this method. async fn download_chunks_by_header( http: &Client, file: &mut File, url: &str, size: Option, offset: u64, user_agent: &str, #[cfg(feature = "indicatif")] pb: Option, ) -> Result<()> { let mut offset = offset; let mut size = size; loop { let range = get_download_range(offset, size); tracing::debug!("Fetching range {}-{}", range.start, range.end); let res = http .get(url.to_owned()) .header(header::USER_AGENT, user_agent) .header(header::ORIGIN, "https://www.youtube.com") .header(header::REFERER, "https://www.youtube.com/") .header( header::RANGE, format!("bytes={}-{}", range.start, range.end), ) .send() .await? .error_for_status()?; if res.content_length().unwrap_or_default() == 0 { return Err(DownloadError::Progressive( format!("empty chunk {}-{}", range.start, range.end).into(), )); } // Content-Range: bytes 0-100/451368980 let cr_header = res .headers() .get(header::CONTENT_RANGE) .ok_or(DownloadError::Progressive(Cow::Borrowed( "Did not get Content-Range header", )))? .to_str() .map_err(|_| { DownloadError::Progressive( "could not convert Content-Range header to string".into(), ) })?; let (parsed_offset, parsed_size) = parse_cr_header(cr_header)?; offset = parsed_offset + 1; if size.is_none() { size = Some(parsed_size); #[cfg(feature = "indicatif")] if let Some(pb) = &pb { pb.inc_length(parsed_size); } } tracing::debug!("Retrieving chunks..."); let mut stream = res.bytes_stream(); while let Some(item) = stream.next().await { // Retrieve chunk. let mut chunk = item?; #[cfg(feature = "indicatif")] if let Some(pb) = &pb { pb.inc(chunk.len() as u64); } file.write_all_buf(&mut chunk).await?; } if offset >= size.unwrap() { break; } } Ok(()) } // Use the `range` url parameter to download a stream in chunks. // This ist used by YouTube's web player. The file size // must be known beforehand (it is included in the stream url). #[allow(clippy::too_many_arguments)] async fn download_chunks_by_param( http: &Client, file: &mut File, url: &str, size: u64, offset: u64, user_agent: &str, #[cfg(feature = "indicatif")] pb: Option, ) -> Result<()> { let mut offset = offset; #[cfg(feature = "indicatif")] if let Some(pb) = &pb { pb.inc_length(size); } loop { let range = get_download_range(offset, Some(size)); tracing::debug!("Fetching range {}-{}", range.start, range.end); let urlp = Url::parse_with_params(url, [("range", &format!("{}-{}", range.start, range.end))]) .map_err(|e| DownloadError::Progressive(format!("url parsing: {e}").into()))?; let res = http .get(urlp) .header(header::USER_AGENT, user_agent) .header(header::ORIGIN, "https://www.youtube.com") .header(header::REFERER, "https://www.youtube.com/") .send() .await? .error_for_status()?; let clen = res.content_length().unwrap_or_default(); if clen == 0 { return Err(DownloadError::Progressive( format!("empty chunk {}-{}", range.start, range.end).into(), )); } let mut stream = res.bytes_stream(); while let Some(item) = stream.next().await { // Retrieve chunk. let mut chunk = item?; #[cfg(feature = "indicatif")] if let Some(pb) = &pb { pb.inc(chunk.len() as u64); } file.write_all_buf(&mut chunk).await?; } offset += clen; tracing::debug!("offset inc by {}, new: {}", clen, offset); if offset >= size { break; } } Ok(()) } #[allow(dead_code)] struct StreamDownload { file: PathBuf, url: String, audio_codec: Option, video_codec: Option, } async fn download_streams( downloads: Vec, http: &Client, user_agent: &str, #[cfg(feature = "indicatif")] pb: Option, ) -> Result> { stream::iter(downloads.iter().map(Ok)) .try_for_each_concurrent(2, |d| { #[cfg(feature = "indicatif")] let pb = pb.clone(); async move { download_single_file( &d.url, &d.file, http, user_agent, #[cfg(feature = "indicatif")] pb, ) .await } }) .await?; Ok(downloads) } async fn convert_streams( downloads: &[StreamDownload], output: &Path, ffmpeg: &str, title: &str, ) -> Result<()> { let output_path: PathBuf = output.into(); let mut args: Vec = vec![]; let mut mapping_args: Vec = vec![]; downloads.iter().enumerate().for_each(|(i, d)| { args.push("-i".into()); args.push(d.file.clone().into()); mapping_args.push("-map".into()); mapping_args.push(i.to_string().into()); }); args.append(&mut mapping_args); args.push("-c".into()); args.push("copy".into()); args.push("-metadata".into()); args.push(format!("title={title}").into()); args.push(output_path.into()); let res = Command::new(ffmpeg).args(args).output().await?; if !res.status.success() { return Err(DownloadError::Ffmpeg( format!( "ffmpeg error: {}", std::str::from_utf8(&res.stderr).unwrap_or_default() ) .into(), )); } Ok(()) } #[cfg(feature = "audiotag")] const YMD_FORMAT: &[time::format_description::FormatItem] = time::macros::format_description!("[year]-[month]-[day]"); #[cfg(feature = "audiotag")] fn extract_yt_release_date( description: &str, publish_date: Option, ) -> Option { static RELEASE_DATE_REGEX: Lazy = Lazy::new(|| Regex::new(r"Released on: (\d{4}-\d{2}-\d{2})").unwrap()); RELEASE_DATE_REGEX .captures(description) .and_then(|cap| { let raw_date = &cap[1]; Date::parse(raw_date, YMD_FORMAT).ok() }) .map(|release_date| { if let Some(upload_date) = publish_date { // Prefer the video upload date if it lies within 4 days of the release date let upload_date = upload_date.date(); let diff = (upload_date - release_date).abs(); if diff < time::Duration::days(4) { return upload_date; } } release_date }) .or_else(|| publish_date.map(|d| d.date())) } #[cfg(test)] mod tests { use super::*; #[test] fn template() { let dest = DownloadDest::Template(PathBuf::from("{channel}/{album}/{track} {title} [{id}]")); let track_path = dest.get_dest_path(&DownloadVideo { id: "a3Fo1vYyiDw".to_owned(), name: Some("Volle Kraft voraus".to_owned()), channel_id: Some("UCE7_p3lcXA-YXRZp2PjrgYw".to_owned()), channel_name: Some("Helene Fischer".to_owned()), album_id: Some("MPREb_O2gXCdCVGsZ".to_owned()), album_name: Some("Rausch (Deluxe)".to_owned()), track_nr: Some(1), }); assert_eq!( track_path.to_str().unwrap(), "Helene Fischer/Rausch (Deluxe)/01 Volle Kraft voraus [a3Fo1vYyiDw]" ); let video_path = dest.get_dest_path(&DownloadVideo { id: "5en96GIijXk".to_owned(), name: Some("a pretty cloud, and a happy duck".to_owned()), channel_id: Some("UCl2mFZoRqjw_ELax4Yisf6w".to_owned()), channel_name: Some("Louis Rossmann".to_owned()), album_id: None, album_name: None, track_nr: None, }); assert_eq!( video_path.to_str().unwrap(), "Louis Rossmann/-/a pretty cloud, and a happy duck [5en96GIijXk]" ); let ido_path = dest.get_dest_path(&DownloadVideo { id: "5en96GIijXk".to_owned(), name: None, channel_id: None, channel_name: None, album_id: None, album_name: None, track_nr: None, }); assert_eq!(ido_path.to_str().unwrap(), "-/-/[5en96GIijXk]"); } }