add downloader
This commit is contained in:
parent
f706d875f6
commit
4600edc81f
4 changed files with 242 additions and 4 deletions
|
|
@ -13,15 +13,16 @@ anyhow = "1.0"
|
|||
thiserror = "1.0.31"
|
||||
url = "2.2.2"
|
||||
log = "0.4.17"
|
||||
reqwest = {version = "0.11.11", features = ["json", "gzip", "brotli"]}
|
||||
tokio = {version = "1.20.0", features = ["macros"]}
|
||||
reqwest = {version = "0.11.11", features = ["json", "gzip", "brotli", "stream"]}
|
||||
tokio = {version = "1.20.0", features = ["macros", "fs", "process"]}
|
||||
serde_json = "1.0.82"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_with = {version = "2.0.0", features = ["json"] }
|
||||
rand = "0.8.5"
|
||||
async-trait = "0.1.56"
|
||||
chrono = {version = "0.4.19", features = ["serde"]}
|
||||
trie = "0.2.1"
|
||||
futures = "0.3.21"
|
||||
indicatif = "0.17.0"
|
||||
|
||||
[dev-dependencies]
|
||||
env_logger = "0.9.0"
|
||||
|
|
|
|||
|
|
@ -420,7 +420,7 @@ impl YTClient for IosClient {
|
|||
"{}{}?key={}{}",
|
||||
YOUTUBEI_V1_GAPIS_URL,
|
||||
endpoint,
|
||||
ANDROID_API_KEY,
|
||||
IOS_API_KEY,
|
||||
DISABLE_PRETTY_PRINT_PARAMETER
|
||||
),
|
||||
)
|
||||
|
|
|
|||
236
src/download.rs
Normal file
236
src/download.rs
Normal file
|
|
@ -0,0 +1,236 @@
|
|||
use std::{cmp::Ordering, ops::Range, path::PathBuf};
|
||||
|
||||
use anyhow::{anyhow, bail, Result};
|
||||
use fancy_regex::Regex;
|
||||
use futures::stream::StreamExt;
|
||||
use indicatif::ProgressBar;
|
||||
use log::debug;
|
||||
use once_cell::sync::Lazy;
|
||||
use rand::Rng;
|
||||
use reqwest::{header, Client};
|
||||
use tokio::{fs, io::AsyncWriteExt, process::Command};
|
||||
|
||||
const CHUNK_SIZE_MIN: u64 = 9000000;
|
||||
const CHUNK_SIZE_MAX: u64 = 11000000;
|
||||
|
||||
fn get_download_range(offset: u64, size: Option<u64>) -> Range<u64> {
|
||||
let mut rng = rand::thread_rng();
|
||||
let chunk_size = rng.gen_range(CHUNK_SIZE_MIN..CHUNK_SIZE_MAX);
|
||||
let mut chunk_end = offset + chunk_size;
|
||||
|
||||
if size.is_some() {
|
||||
chunk_end = chunk_end.min(size.unwrap() - 1)
|
||||
}
|
||||
|
||||
Range {
|
||||
start: offset,
|
||||
end: chunk_end,
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_cr_header(cr_header: &str) -> Result<(u64, u64)> {
|
||||
static PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r#"bytes (\d+)-(\d+)/(\d+)"#).unwrap());
|
||||
|
||||
let captures = some_or_bail!(
|
||||
PATTERN.captures(&cr_header).ok().flatten(),
|
||||
Err(anyhow!(
|
||||
"Content-Range header '{}' does not match pattern.",
|
||||
cr_header
|
||||
))
|
||||
);
|
||||
|
||||
Ok((
|
||||
captures.get(2).unwrap().as_str().parse()?,
|
||||
captures.get(3).unwrap().as_str().parse()?,
|
||||
))
|
||||
}
|
||||
|
||||
async fn download_single_file(
|
||||
url: &str,
|
||||
output: &str,
|
||||
http: Client,
|
||||
pb: ProgressBar,
|
||||
) -> Result<()> {
|
||||
// Check if file is already downloaded
|
||||
let output_path = PathBuf::from(output);
|
||||
if output_path.exists() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let output_path_tmp = PathBuf::from(output.to_owned() + ".part");
|
||||
let mut offset: u64 = 0;
|
||||
let mut size: Option<u64> = None;
|
||||
|
||||
// Check if file is partially downloaded
|
||||
if output_path_tmp.exists() {
|
||||
let file_size = output_path_tmp.metadata()?.len();
|
||||
|
||||
let res = http
|
||||
.head(url)
|
||||
.header(header::RANGE, "bytes=0-0")
|
||||
.send()
|
||||
.await?
|
||||
.error_for_status()?;
|
||||
|
||||
let cr_header = some_or_bail!(
|
||||
res.headers().get(header::CONTENT_RANGE),
|
||||
Err(anyhow!("Did not get Content-Range header"))
|
||||
)
|
||||
.to_str()?;
|
||||
|
||||
let (_, original_size) = parse_cr_header(cr_header)?;
|
||||
|
||||
match file_size.cmp(&original_size) {
|
||||
Ordering::Less => {
|
||||
// Partially downloaded
|
||||
size = Some(original_size);
|
||||
offset = file_size;
|
||||
|
||||
pb.inc_length(original_size);
|
||||
pb.inc(offset);
|
||||
}
|
||||
Ordering::Equal => {
|
||||
// Already downloaded
|
||||
fs::rename(output_path_tmp, output_path).await?;
|
||||
return Ok(());
|
||||
}
|
||||
Ordering::Greater => {
|
||||
// WTF?
|
||||
return Err(anyhow!(
|
||||
"Already downloaded file {} is larger than original",
|
||||
output_path_tmp.to_str().unwrap_or_default()
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut file = fs::OpenOptions::new()
|
||||
.append(true)
|
||||
.create(true)
|
||||
.open(output_path_tmp.to_owned())
|
||||
.await?;
|
||||
|
||||
loop {
|
||||
let range = get_download_range(offset, size);
|
||||
debug!("Fetching range {}-{}", range.start, range.end);
|
||||
|
||||
let res = http
|
||||
.get(url)
|
||||
.header(header::ORIGIN, "https://www.youtube.com")
|
||||
.header(header::REFERER, "https://www.youtube.com/")
|
||||
.header(
|
||||
header::RANGE,
|
||||
format!("bytes={}-{}", range.start, range.end),
|
||||
)
|
||||
.send()
|
||||
.await?
|
||||
.error_for_status()?;
|
||||
|
||||
// Content-Range: bytes 0-100/451368980
|
||||
let cr_header = some_or_bail!(
|
||||
res.headers().get(header::CONTENT_RANGE),
|
||||
Err(anyhow!("Did not get Content-Range header"))
|
||||
)
|
||||
.to_str()?;
|
||||
|
||||
let (parsed_offset, parsed_size) = parse_cr_header(cr_header)?;
|
||||
|
||||
offset = parsed_offset;
|
||||
if size.is_none() {
|
||||
size = Some(parsed_size);
|
||||
pb.inc_length(parsed_size);
|
||||
}
|
||||
|
||||
debug!("Retrieving chunks...");
|
||||
let mut stream = res.bytes_stream();
|
||||
while let Some(item) = stream.next().await {
|
||||
// Retrieve chunk.
|
||||
let mut chunk = item?;
|
||||
pb.inc(chunk.len() as u64);
|
||||
file.write_all_buf(&mut chunk).await?;
|
||||
}
|
||||
|
||||
if offset >= size.unwrap() - 1 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fs::rename(output_path_tmp, output_path).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ffmpeg -i video.webm -i audio.webm -c copy output.mp4
|
||||
async fn join_video_audio(
|
||||
video_file: &str,
|
||||
audio_file: &str,
|
||||
output_file: &str,
|
||||
ffmpeg: &str,
|
||||
) -> Result<()> {
|
||||
let res = Command::new(ffmpeg)
|
||||
.args([
|
||||
"-i",
|
||||
video_file,
|
||||
"-i",
|
||||
audio_file,
|
||||
"-c",
|
||||
"copy",
|
||||
output_file,
|
||||
])
|
||||
.output()
|
||||
.await?;
|
||||
|
||||
if !res.status.success() {
|
||||
bail!(
|
||||
"ffmpeg error: {}",
|
||||
std::str::from_utf8(&res.stderr).unwrap_or_default()
|
||||
)
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use indicatif::ProgressStyle;
|
||||
use reqwest::ClientBuilder;
|
||||
|
||||
const TEST_URL_AUDIO: &str = "https://rr5---sn-h0jeenl6.googlevideo.com/videoplayback?c=WEB&clen=3781277&dur=229.301&ei=Wd3oYqnIHLKYx_APgPCeoAM&expire=1659449785&fexp=24001373%2C24007246&fvip=5&gir=yes&id=o-AH9zSQFJUzAo61SdF1m4PUcknacuL35Mm8TgOmD5lfwF&initcwndbps=1597500&ip=2003%3Ade%3Aaf0e%3A2f00%3Ade47%3A297%3Aa6db%3A774e&itag=251&keepalive=yes&lmt=1655510291473933&lsig=AG3C_xAwRgIhAOPc6qa-C6x1GOFxx5hpiP_ZFFeCAdHSr43mq4PujcasAiEA8NHcpNsurS187Gjg1WseiaQ_kslkKWU4fylIVGr4p8Y%3D&lsparams=mh%2Cmm%2Cmn%2Cms%2Cmv%2Cmvi%2Cpl%2Cinitcwndbps&mh=hH&mime=audio%2Fwebm&mm=31%2C26&mn=sn-h0jeenl6%2Csn-4g5ednsl&ms=au%2Conr&mt=1659427257&mv=m&mvi=5&n=cRL0RZUaCeszsQ&ns=1UbvTJx8sEFT4vlb0jQyd68H&pl=37&requiressl=yes&sig=AOq0QJ8wRQIgRY8UR_GHs7T2ZX-0g6vRzvQS5MqpAMOs3sBpPthEzMUCIQDkh7aZOGpgzy82ha2CN2yiYS9NVHBd5WGa1e3K8GYKKg%3D%3D&source=youtube&sparams=expire%2Cei%2Cip%2Cid%2Citag%2Csource%2Crequiressl%2Cspc%2Cvprv%2Cmime%2Cns%2Cgir%2Cclen%2Cdur%2Clmt&spc=lT-Khs9YQMiuc_CePC7R74ycrwd1hNk&txp=4532434&vprv=1";
|
||||
const TEST_URL_VIDEO: &str = "https://rr5---sn-h0jeenl6.googlevideo.com/videoplayback?aitags=133%2C134%2C135%2C136%2C137%2C160%2C242%2C243%2C244%2C247%2C248%2C271%2C278%2C313%2C394%2C395%2C396%2C397%2C398%2C399%2C400%2C401&c=WEB&clen=66413039&dur=229.270&ei=Wd3oYqnIHLKYx_APgPCeoAM&expire=1659449785&fexp=24001373%2C24007246&fvip=5&gir=yes&id=o-AH9zSQFJUzAo61SdF1m4PUcknacuL35Mm8TgOmD5lfwF&initcwndbps=1597500&ip=2003%3Ade%3Aaf0e%3A2f00%3Ade47%3A297%3Aa6db%3A774e&itag=248&keepalive=yes&lmt=1655512874472691&lsig=AG3C_xAwRAIgbmq3hI3VDXrOvENhCotYujpiKaJODqLVq-Il8K9OIwwCIHk-H0SzI4tH1w3TzKnVSbpjghk3AByD9VD75Ywii1F_&lsparams=mh%2Cmm%2Cmn%2Cms%2Cmv%2Cmvi%2Cpl%2Cinitcwndbps&mh=hH&mime=video%2Fwebm&mm=31%2C26&mn=sn-h0jeenl6%2Csn-4g5ednsl&ms=au%2Conr&mt=1659427257&mv=m&mvi=5&n=cRL0RZUaCeszsQ&ns=1UbvTJx8sEFT4vlb0jQyd68H&pl=37&requiressl=yes&sig=AOq0QJ8wRgIhAOuxn8gnk3FFCPPpEoylYPcLyas52BvyT7DzSAsbmJMIAiEAzUAnieCK31ZVQydfExQ5FSrCGJR3AzcwqgpENBzunjA%3D&source=youtube&sparams=expire%2Cei%2Cip%2Cid%2Caitags%2Csource%2Crequiressl%2Cspc%2Cvprv%2Cmime%2Cns%2Cgir%2Cclen%2Cdur%2Clmt&spc=lT-Khs9YQMiuc_CePC7R74ycrwd1hNk&txp=4537434&vprv=1";
|
||||
|
||||
#[tokio::test]
|
||||
async fn test() {
|
||||
// download_file(TEST_URL_LARGE, ".tmp/test.webm").await;
|
||||
let http = ClientBuilder::new()
|
||||
.user_agent(
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; rv:107.0) Gecko/20100101 Firefox/107.0",
|
||||
)
|
||||
.gzip(true)
|
||||
.brotli(true)
|
||||
.build()
|
||||
.expect("unable to build the HTTP client");
|
||||
|
||||
// Indicatif setup
|
||||
let pb = ProgressBar::new(0);
|
||||
pb.set_style(ProgressStyle::default_bar()
|
||||
.template("{msg}\n{spinner:.green} [{elapsed_precise}] [{wide_bar:.cyan/blue}] {bytes}/{total_bytes} ({bytes_per_sec}, {eta})").unwrap()
|
||||
.progress_chars("#>-"));
|
||||
pb.set_message("Downloading");
|
||||
|
||||
let (r1, r2) = tokio::join!(
|
||||
download_single_file(TEST_URL_VIDEO, "tmp/test.webm", http.clone(), pb.clone()),
|
||||
download_single_file(TEST_URL_AUDIO, "tmp/test_audio.webm", http, pb)
|
||||
);
|
||||
r1.unwrap();
|
||||
r2.unwrap();
|
||||
|
||||
join_video_audio(
|
||||
"tmp/test.webm",
|
||||
"tmp/test_audio.webm",
|
||||
"tmp/test.mp4",
|
||||
"ffmpeg",
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -10,3 +10,4 @@ mod deobfuscate;
|
|||
|
||||
pub mod model;
|
||||
pub mod client;
|
||||
pub mod download;
|
||||
|
|
|
|||
Reference in a new issue