This repository has been archived on 2026-05-27. You can view files and clone it, but you cannot make any changes to it's state, such as pushing and creating new issues, pull requests or comments.
rustypipe/src/download.rs

506 lines
15 KiB
Rust

//! YouTube audio/video downloader
use std::{borrow::Cow, cmp::Ordering, ffi::OsString, ops::Range, path::PathBuf};
use fancy_regex::Regex;
use futures::stream::{self, StreamExt};
use indicatif::ProgressBar;
use log::{debug, info};
use once_cell::sync::Lazy;
use rand::Rng;
use reqwest::{header, Client};
use tokio::{
fs::{self, File},
io::AsyncWriteExt,
process::Command,
};
use crate::{
error::DownloadError,
model::{AudioCodec, FileFormat, VideoCodec, VideoPlayer},
param::StreamFilter,
util,
};
type Result<T> = core::result::Result<T, DownloadError>;
const CHUNK_SIZE_MIN: u64 = 9000000;
const CHUNK_SIZE_MAX: u64 = 10000000;
fn get_download_range(offset: u64, size: Option<u64>) -> Range<u64> {
let mut rng = rand::thread_rng();
let chunk_size = rng.gen_range(CHUNK_SIZE_MIN..CHUNK_SIZE_MAX);
let mut chunk_end = offset + chunk_size;
if let Some(size) = size {
chunk_end = chunk_end.min(size - 1)
}
Range {
start: offset,
end: chunk_end,
}
}
fn parse_cr_header(cr_header: &str) -> Result<(u64, u64)> {
static PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r#"bytes (\d+)-(\d+)/(\d+)"#).unwrap());
let captures = PATTERN.captures(cr_header).ok().flatten().ok_or_else(|| {
DownloadError::Progressive(
format!(
"Content-Range header '{}' does not match pattern",
cr_header
)
.into(),
)
})?;
Ok((
captures.get(2).unwrap().as_str().parse().map_err(|_| {
DownloadError::Progressive("could not parse range header number".into())
})?,
captures.get(3).unwrap().as_str().parse().map_err(|_| {
DownloadError::Progressive("could not parse range header number".into())
})?,
))
}
async fn download_single_file<P: Into<PathBuf>>(
url: &str,
output: P,
http: Client,
pb: ProgressBar,
) -> Result<()> {
// Check if file is already downloaded
let output_path: PathBuf = output.into();
if output_path.exists() {
return Ok(());
}
let mut extension = OsString::from(output_path.extension().unwrap_or_default());
extension.push(".part");
let output_path_tmp = output_path.with_extension(extension);
let mut offset: u64 = 0;
let mut size: Option<u64> = None;
// If the url is from googlevideo, extract file size from clen parameter
let (url_base, url_params) =
util::url_to_params(url).map_err(|e| DownloadError::Other(e.to_string().into()))?;
let is_gvideo = url_base
.as_str()
.ends_with(".googlevideo.com/videoplayback");
if is_gvideo {
size = url_params.get("clen").and_then(|s| s.parse::<u64>().ok());
}
// Check if file is partially downloaded
if output_path_tmp.exists() {
let file_size = output_path_tmp.metadata()?.len();
let res = http
.head(url.to_owned())
.header(header::RANGE, "bytes=0-0")
.send()
.await?
.error_for_status()?;
let cr_header = res
.headers()
.get(header::CONTENT_RANGE)
.ok_or(DownloadError::Progressive(Cow::Borrowed(
"Did not get Content-Range header",
)))?
.to_str()
.map_err(|_| {
DownloadError::Progressive(
"could not convert Content-Range header to string".into(),
)
})?;
let (_, original_size) = parse_cr_header(cr_header)?;
match file_size.cmp(&original_size) {
Ordering::Less => {
// Partially downloaded
size = Some(original_size);
offset = file_size;
pb.inc_length(original_size);
pb.inc(offset);
}
Ordering::Equal => {
// Already downloaded
fs::rename(output_path_tmp, output_path).await?;
return Ok(());
}
Ordering::Greater => {
// WTF?
return Err(DownloadError::Other(
format!(
"Already downloaded file {} is larger than original",
output_path_tmp.to_str().unwrap_or_default()
)
.into(),
));
}
}
}
let mut file = fs::OpenOptions::new()
.append(true)
.create(true)
.open(output_path_tmp.to_owned())
.await?;
if is_gvideo && size.is_some() {
download_chunks_by_param(http, &mut file, url, size.unwrap(), offset, pb).await?;
} else {
download_chunks_by_header(http, &mut file, url, size, offset, pb).await?;
}
fs::rename(output_path_tmp, output_path).await?;
Ok(())
}
// Use the HTTP range header to download a stream in chunks.
// This is the standardized method that works on all web servers,
// but I have observed throttling using this method.
async fn download_chunks_by_header(
http: Client,
file: &mut File,
url: &str,
size: Option<u64>,
offset: u64,
pb: ProgressBar,
) -> Result<()> {
let mut offset = offset;
let mut size = size;
loop {
let range = get_download_range(offset, size);
debug!("Fetching range {}-{}", range.start, range.end);
let res = http
.get(url.to_owned())
.header(header::ORIGIN, "https://www.youtube.com")
.header(header::REFERER, "https://www.youtube.com/")
.header(
header::RANGE,
format!("bytes={}-{}", range.start, range.end),
)
.send()
.await?
.error_for_status()?;
// Content-Range: bytes 0-100/451368980
let cr_header = res
.headers()
.get(header::CONTENT_RANGE)
.ok_or(DownloadError::Progressive(Cow::Borrowed(
"Did not get Content-Range header",
)))?
.to_str()
.map_err(|_| {
DownloadError::Progressive(
"could not convert Content-Range header to string".into(),
)
})?;
let (parsed_offset, parsed_size) = parse_cr_header(cr_header)?;
offset = parsed_offset + 1;
if size.is_none() {
size = Some(parsed_size);
pb.inc_length(parsed_size);
}
debug!("Retrieving chunks...");
let mut stream = res.bytes_stream();
while let Some(item) = stream.next().await {
// Retrieve chunk.
let mut chunk = item?;
pb.inc(chunk.len() as u64);
file.write_all_buf(&mut chunk).await?;
}
if offset >= size.unwrap() {
break;
}
}
Ok(())
}
// Use the `range` url parameter to download a stream in chunks.
// This ist used by YouTube's web player. The file size
// must be known beforehand (it is included in the stream url).
async fn download_chunks_by_param(
http: Client,
file: &mut File,
url: &str,
size: u64,
offset: u64,
pb: ProgressBar,
) -> Result<()> {
let mut offset = offset;
pb.inc_length(size);
loop {
let range = get_download_range(offset, Some(size));
debug!("Fetching range {}-{}", range.start, range.end);
let res = http
.get(format!("{}&range={}-{}", url, range.start, range.end))
.header(header::ORIGIN, "https://www.youtube.com")
.header(header::REFERER, "https://www.youtube.com/")
.send()
.await?
.error_for_status()?;
let clen = res.content_length().unwrap();
debug!("Retrieving chunks...");
let mut stream = res.bytes_stream();
while let Some(item) = stream.next().await {
// Retrieve chunk.
let mut chunk = item?;
pb.inc(chunk.len() as u64);
file.write_all_buf(&mut chunk).await?;
}
offset += clen;
debug!("offset inc by {}, new: {}", clen, offset);
if offset >= size {
break;
}
}
Ok(())
}
#[allow(dead_code)]
struct StreamDownload {
file: PathBuf,
// track_name: String TODO: add for multiple audio languages,
url: String,
audio_codec: Option<AudioCodec>,
video_codec: Option<VideoCodec>,
}
#[allow(clippy::too_many_arguments)]
pub async fn download_video(
player_data: &VideoPlayer,
output_dir: &str,
output_fname: Option<String>,
output_format: Option<String>,
filter: &StreamFilter,
ffmpeg: &str,
http: Client,
pb: ProgressBar,
) -> Result<()> {
// Download filepath
let download_dir = PathBuf::from(output_dir);
let title = player_data.details.title.to_owned();
let output_fname_set = output_fname.is_some();
let output_fname = output_fname.unwrap_or_else(|| {
filenamify::filenamify(format!("{} [{}]", title, player_data.details.id))
});
// Select streams to download
let (video, audio) = player_data.select_video_audio_stream(filter);
if video.is_none() && audio.is_none() {
return Err(DownloadError::Input("no stream found".into()));
}
let format = output_format.unwrap_or(
match video {
Some(_) => "mp4",
None => match audio {
Some(audio) => match audio.codec {
AudioCodec::Unknown => {
return Err(DownloadError::Input("unknown audio codec".into()))
}
AudioCodec::Mp4a => "m4a",
AudioCodec::Opus => "opus",
},
None => unreachable!(),
},
}
.to_owned(),
);
let output_path = download_dir.join(&output_fname).with_extension(&format);
if output_path.exists() {
// If the downloaded video already exists, only error if the download path was
// chosen explicitly.
if output_fname_set {
return Err(DownloadError::Input(
format!("File {} already exists", output_path.to_string_lossy()).into(),
))?;
} else {
info!(
"Downloaded video {} already exists",
output_path.to_string_lossy()
);
return Ok(());
}
}
match (video, audio) {
// Downloading combined video/audio stream (no conversion)
(Some(video), None) => {
pb.set_message(format!("Downloading {}", title));
download_single_file(
&video.url,
download_dir.join(output_fname).with_extension(&format),
http,
pb.clone(),
)
.await?;
}
// Downloading split video/audio streams (requires conversion with ffmpeg)
_ => {
let mut downloads: Vec<StreamDownload> = Vec::new();
if let Some(v) = video {
downloads.push(StreamDownload {
file: download_dir.join(format!(
"{}.video{}",
output_fname,
v.format.extension()
)),
url: v.url.to_owned(),
video_codec: Some(v.codec),
audio_codec: None,
});
}
if let Some(a) = audio {
downloads.push(StreamDownload {
file: download_dir.join(format!(
"{}.audio{}",
output_fname,
a.format.extension()
)),
url: a.url.to_owned(),
video_codec: None,
audio_codec: Some(a.codec),
})
}
pb.set_message(format!("Downloading {}", title));
download_streams(&downloads, http, pb.clone()).await?;
pb.set_message(format!("Converting {}", title));
convert_streams(&downloads, output_path, ffmpeg).await?;
// Delete original files
stream::iter(&downloads)
.map(|d| fs::remove_file(d.file.to_owned()))
.buffer_unordered(downloads.len())
.collect::<Vec<_>>()
.await
.into_iter()
.collect::<core::result::Result<_, _>>()?;
}
}
pb.finish_and_clear();
Ok(())
}
async fn download_streams(
downloads: &Vec<StreamDownload>,
http: Client,
pb: ProgressBar,
) -> Result<()> {
let n = downloads.len();
stream::iter(downloads)
.map(|d| download_single_file(&d.url, d.file.to_owned(), http.clone(), pb.clone()))
.buffer_unordered(n)
.collect::<Vec<_>>()
.await
.into_iter()
.collect::<Result<Vec<_>>>()?;
Ok(())
}
async fn convert_streams<P: Into<PathBuf>>(
downloads: &Vec<StreamDownload>,
output: P,
ffmpeg: &str,
) -> Result<()> {
let output_path: PathBuf = output.into();
let mut args: Vec<OsString> = vec![];
let mut mapping_args: Vec<OsString> = vec![];
downloads.iter().enumerate().for_each(|(i, d)| {
args.push("-i".into());
args.push(d.file.to_owned().into());
mapping_args.push("-map".into());
mapping_args.push(i.to_string().into());
});
args.append(&mut mapping_args);
// Combining multiple streams, keep codecs
if downloads.len() > 1 {
args.push("-c".into());
args.push("copy".into());
}
args.push(output_path.into());
let res = Command::new(ffmpeg).args(args).output().await?;
if !res.status.success() {
return Err(DownloadError::Ffmpeg(
format!(
"ffmpeg error: {}",
std::str::from_utf8(&res.stderr).unwrap_or_default()
)
.into(),
));
}
Ok(())
}
/*
#[cfg(test)]
mod tests {
use crate::client::RustyTube;
use super::*;
use indicatif::{ProgressDrawTarget, ProgressStyle};
use reqwest::ClientBuilder;
// #[test_log::test(tokio::test)]
#[tokio::test]
async fn t_download_video() {
let http = ClientBuilder::new()
.user_agent(
"Mozilla/5.0 (Windows NT 10.0; Win64; rv:107.0) Gecko/20100101 Firefox/107.0",
)
.gzip(true)
.brotli(true)
.build()
.expect("unable to build the HTTP client");
// Indicatif setup
let pb = ProgressBar::new(0);
let rt = RustyTube::new();
let player_data = rt
.get_player("AbZH7XWDW_k", crate::client::ClientType::Desktop)
.await
.unwrap();
// download_video(&player_data, "tmp", "INVU", Some(1080), "ffmpeg", http, pb)
// .await
// .unwrap();
}
}
*/