fix: reworked retry system

This commit is contained in:
ThetaDev 2023-05-12 17:19:56 +02:00
parent d128ca4214
commit a2bbc850a7
17 changed files with 273 additions and 121 deletions

View file

@ -17,7 +17,6 @@ testyt10:
testintl:
#!/usr/bin/env bash
set -e
LANGUAGES=(
"af" "am" "ar" "as" "az" "be" "bg" "bn" "bs" "ca" "cs" "da" "de" "el"
"en" "en-GB" "en-IN"
@ -27,13 +26,22 @@ testintl:
"pt" "pt-PT" "ro" "ru" "si" "sk" "sl" "sq" "sr" "sr-Latn" "sv" "sw" "ta"
"te" "th" "tr" "uk" "ur" "uz" "vi" "zh-CN" "zh-HK" "zh-TW" "zu"
)
for YT_LANG in "${LANGUAGES[@]}"; do \
echo "---TESTS FOR $YT_LANG ---"; \
YT_LANG="$YT_LANG" cargo test --test youtube -- --skip get_video_details --skip startpage; \
echo "--- $YT_LANG COMPLETED ---"; \
sleep 10; \
N_FAILED=0
for YT_LANG in "${LANGUAGES[@]}"; do
echo "---TESTS FOR $YT_LANG ---"
if YT_LANG="$YT_LANG" cargo test --test youtube -- --test-threads 4 --skip resolve; then
echo "--- $YT_LANG COMPLETED ---"
else
echo "--- $YT_LANG FAILED ---"
((N_FAILED++))
fi
done
exit "$N_FAILED"
testfiles:
cargo run -p rustypipe-codegen download-testfiles

View file

@ -7,11 +7,31 @@ edition = "2021"
default = ["rustls-tls-native-roots"]
# Reqwest TLS options
native-tls = ["reqwest/native-tls", "rustypipe/native-tls", "rustypipe-downloader/native-tls"]
native-tls-alpn = ["reqwest/native-tls-alpn", "rustypipe/native-tls-alpn", "rustypipe-downloader/native-tls-alpn"]
native-tls-vendored = ["reqwest/native-tls-vendored", "rustypipe/native-tls-vendored", "rustypipe-downloader/native-tls-vendored"]
rustls-tls-webpki-roots = ["reqwest/rustls-tls-webpki-roots", "rustypipe/rustls-tls-webpki-roots", "rustypipe-downloader/rustls-tls-webpki-roots"]
rustls-tls-native-roots = ["reqwest/rustls-tls-native-roots", "rustypipe/rustls-tls-native-roots", "rustypipe-downloader/rustls-tls-native-roots"]
native-tls = [
"reqwest/native-tls",
"rustypipe/native-tls",
"rustypipe-downloader/native-tls",
]
native-tls-alpn = [
"reqwest/native-tls-alpn",
"rustypipe/native-tls-alpn",
"rustypipe-downloader/native-tls-alpn",
]
native-tls-vendored = [
"reqwest/native-tls-vendored",
"rustypipe/native-tls-vendored",
"rustypipe-downloader/native-tls-vendored",
]
rustls-tls-webpki-roots = [
"reqwest/rustls-tls-webpki-roots",
"rustypipe/rustls-tls-webpki-roots",
"rustypipe-downloader/rustls-tls-webpki-roots",
]
rustls-tls-native-roots = [
"reqwest/rustls-tls-native-roots",
"rustypipe/rustls-tls-native-roots",
"rustypipe-downloader/rustls-tls-native-roots",
]
[dependencies]
rustypipe = { path = "../", default-features = false }

View file

@ -23,7 +23,7 @@ impl RustyPipeQuery {
);
let xml = self
.client
.http_request_txt(self.client.inner.http.get(&url).build()?)
.http_request_txt(&self.client.inner.http.get(&url).build()?)
.await
.map_err(|e| match e {
Error::HttpStatus(404, _) => Error::Extraction(ExtractionError::NotFound {

View file

@ -322,6 +322,14 @@ struct ClientData {
pub version: String,
}
/// Result of a successful HTTP request
struct RequestResult<T> {
/// Result of the deserialiation/mapping
res: Result<MapResult<T>, Error>,
status: StatusCode,
body: String,
}
impl<T> CacheEntry<T> {
fn get(&self) -> Option<&T> {
match self {
@ -487,8 +495,9 @@ impl RustyPipeBuilder {
/// Set the number of retries for HTTP requests.
///
/// If a HTTP requests fails and retries are enabled,
/// If a HTTP requests fails because of a serverside error and retries are enabled,
/// RustyPipe waits 1 second before the next attempt.
///
/// The waiting time is doubled for subsequent attempts (including a bit of
/// random jitter to be less predictable).
///
@ -591,40 +600,43 @@ impl RustyPipe {
}
/// Execute the given http request.
async fn http_request(&self, request: Request) -> Result<Response, reqwest::Error> {
let mut last_res = None;
async fn http_request(&self, request: &Request) -> Result<Response, reqwest::Error> {
let mut last_resp = None;
for n in 0..=self.inner.n_http_retries {
let res = self.inner.http.execute(request.try_clone().unwrap()).await;
let emsg = match &res {
Ok(response) => {
let status = response.status();
// Immediately return in case of success or unrecoverable status code
if status.is_success() || (!status.is_server_error() && status != 429) {
return res;
}
status.to_string()
}
Err(e) => {
// Immediately return in case of unrecoverable error
if !e.is_timeout() && !e.is_connect() {
return res;
}
e.to_string()
}
};
let resp = self
.inner
.http
.execute(request.try_clone().unwrap())
.await?;
let ms = util::retry_delay(n, 1000, 60000, 3);
log::warn!("Retry attempt #{}. Error: {}. Waiting {} ms", n, emsg, ms);
tokio::time::sleep(Duration::from_millis(ms.into())).await;
let status = resp.status();
// Immediately return in case of success or unrecoverable status code
if status.is_success()
|| (!status.is_server_error() && status != StatusCode::TOO_MANY_REQUESTS)
{
return Ok(resp);
}
last_res = Some(res);
// Retry in case of a recoverable status code (server err, too many requests)
if n != self.inner.n_http_retries {
let ms = util::retry_delay(n, 1000, 60000, 3);
log::warn!(
"Retry attempt #{}. Error: {}. Waiting {} ms",
n + 1,
status,
ms
);
tokio::time::sleep(Duration::from_millis(ms.into())).await;
}
last_resp = Some(resp);
}
last_res.unwrap()
Ok(last_resp.unwrap())
}
/// Execute the given http request, returning an error in case of a
/// non-successful status code.
async fn http_request_estatus(&self, request: Request) -> Result<Response, Error> {
async fn http_request_estatus(&self, request: &Request) -> Result<Response, Error> {
let res = self.http_request(request).await?;
let status = res.status();
@ -636,7 +648,7 @@ impl RustyPipe {
}
/// Execute the given http request, returning the response body as a string.
async fn http_request_txt(&self, request: Request) -> Result<String, Error> {
async fn http_request_txt(&self, request: &Request) -> Result<String, Error> {
Ok(self.http_request_estatus(request).await?.text().await?)
}
@ -672,7 +684,8 @@ impl RustyPipe {
let from_swjs = sw_url.map(|sw_url| async move {
let swjs = self
.http_request_txt(
self.inner
&self
.inner
.http
.get(sw_url)
.header(header::ORIGIN, origin)
@ -696,7 +709,7 @@ impl RustyPipe {
builder = builder.header(header::USER_AGENT, ua);
}
let html = self.http_request_txt(builder.build().unwrap()).await?;
let html = self.http_request_txt(&builder.build().unwrap()).await?;
util::get_cg_from_regexes(CLIENT_VERSION_REGEXES.iter(), &html, 1).ok_or(
Error::Extraction(ExtractionError::InvalidData(Cow::Borrowed(
@ -1069,6 +1082,85 @@ impl RustyPipeQuery {
}
}
async fn yt_request_attempt<R: DeserializeOwned + MapResponse<M> + Debug, M>(
&self,
request: &Request,
id: &str,
deobf: Option<&DeobfData>,
) -> Result<RequestResult<M>, Error> {
let response = self
.client
.inner
.http
.execute(request.try_clone().unwrap())
.await?;
let status = response.status();
let body = response.text().await?;
let res = if status.is_client_error() || status.is_server_error() {
let error_msg = serde_json::from_str::<response::ErrorResponse>(&body)
.map(|r| Cow::from(r.error.message));
Err(match status {
StatusCode::NOT_FOUND => Error::Extraction(ExtractionError::NotFound {
id: id.to_owned(),
msg: error_msg.unwrap_or("404".into()),
}),
StatusCode::BAD_REQUEST => {
Error::Extraction(ExtractionError::BadRequest(error_msg.unwrap_or_default()))
}
_ => Error::HttpStatus(status.as_u16(), error_msg.unwrap_or_default()),
})
} else {
match serde_json::from_str::<R>(&body) {
Ok(deserialized) => match deserialized.map_response(id, self.opts.lang, deobf) {
Ok(mapres) => Ok(mapres),
Err(e) => Err(e.into()),
},
Err(e) => Err(Error::from(ExtractionError::from(e))),
}
};
Ok(RequestResult { res, status, body })
}
async fn yt_request<R: DeserializeOwned + MapResponse<M> + Debug, M>(
&self,
request: &Request,
id: &str,
deobf: Option<&DeobfData>,
) -> Result<RequestResult<M>, Error> {
let mut last_resp = None;
for n in 0..=self.client.inner.n_http_retries {
let resp = self.yt_request_attempt::<R, M>(request, id, deobf).await?;
let err = match &resp.res {
Ok(_) => return Ok(resp),
Err(e) => {
if !e.should_retry() {
return Ok(resp);
}
e
}
};
if n != self.client.inner.n_http_retries {
let ms = util::retry_delay(n, 1000, 60000, 3);
log::warn!(
"Retry attempt #{}. Error: {}. Waiting {} ms",
n + 1,
err,
ms
);
tokio::time::sleep(Duration::from_millis(ms.into())).await;
}
last_resp = Some(resp);
}
Ok(last_resp.unwrap())
}
/// Execute a request to the YouTube API, then deobfuscate and map the response.
///
/// Creates a report in case of failure for easy debugging.
@ -1104,18 +1196,31 @@ impl RustyPipeQuery {
.json(body)
.build()?;
let request_url = request.url().to_string();
let request_headers = request.headers().to_owned();
let response = self.client.http_request(request).await?;
let status = response.status();
let resp_str = response.text().await?;
let req_res = self.yt_request::<R, M>(&request, id, deobf).await?;
// Uncomment to debug response text
// println!("{}", &resp_str);
// println!("{}", &req_res.body);
let create_report = |level: Level, error: Option<String>, msgs: Vec<String>| {
let (level, error, msgs, res) = match req_res.res {
Ok(mapres) => {
let level = if mapres.warnings.is_empty() {
Level::DBG
} else {
Level::WRN
};
(level, None, mapres.warnings, Ok(mapres.c))
}
Err(e) => {
let level = if e.should_report() {
Level::ERR
} else {
Level::DBG
};
(level, Some(e.to_string()), Vec::new(), Err(e))
}
};
if level > Level::DBG || self.opts.report {
if let Some(reporter) = &self.client.inner.reporter {
let report = Report {
info: Default::default(),
@ -1125,75 +1230,29 @@ impl RustyPipeQuery {
msgs,
deobf_data: deobf.cloned(),
http_request: crate::report::HTTPRequest {
url: request_url,
url: request.url().to_string(),
method: "POST".to_string(),
req_header: request_headers
req_header: request
.headers()
.iter()
.map(|(k, v)| {
(k.to_string(), v.to_str().unwrap_or_default().to_owned())
})
.collect(),
req_body: serde_json::to_string(body).unwrap_or_default(),
status: status.into(),
resp_body: resp_str.to_owned(),
status: req_res.status.into(),
resp_body: req_res.body,
},
};
reporter.report(&report);
}
};
if status.is_client_error() || status.is_server_error() {
let error_msg = serde_json::from_str::<response::ErrorResponse>(&resp_str)
.map(|r| Cow::from(r.error.message));
return match status {
StatusCode::NOT_FOUND => Err(Error::Extraction(ExtractionError::NotFound {
id: id.to_owned(),
msg: error_msg.unwrap_or("404".into()),
})),
StatusCode::BAD_REQUEST => Err(Error::Extraction(ExtractionError::BadRequest(
error_msg.unwrap_or_default(),
))),
_ => Err(Error::HttpStatus(
status.as_u16(),
error_msg.unwrap_or_default(),
)),
};
}
match serde_json::from_str::<R>(&resp_str) {
Ok(deserialized) => match deserialized.map_response(id, self.opts.lang, deobf) {
Ok(mapres) => {
if !mapres.warnings.is_empty() {
create_report(
Level::WRN,
Some(ExtractionError::DeserializationWarnings.to_string()),
mapres.warnings,
);
if self.opts.strict {
return Err(Error::Extraction(
ExtractionError::DeserializationWarnings,
));
}
} else if self.opts.report {
create_report(Level::DBG, None, vec![]);
}
Ok(mapres.c)
}
Err(e) => {
if e.should_report() || self.opts.report {
create_report(Level::ERR, Some(e.to_string()), Vec::new());
}
Err(e.into())
}
},
Err(e) => {
create_report(Level::ERR, Some(e.to_string()), Vec::new());
Err(Error::from(ExtractionError::from(e)))
}
if res.is_ok() && level > Level::DBG && self.opts.strict {
return Err(Error::Extraction(ExtractionError::DeserializationWarnings));
}
res
}
/// Execute a request to the YouTube API, then map the response.
@ -1238,7 +1297,7 @@ impl RustyPipeQuery {
.json(body)
.build()?;
self.client.http_request_txt(request).await
self.client.http_request_txt(&request).await
}
}

View file

@ -269,6 +269,7 @@ fn map_artist_page(
}
}
mapper.check_unknown()?;
let mut mapped = mapper.group_items();
static WIKIPEDIA_REGEX: Lazy<Regex> =
@ -355,6 +356,7 @@ impl MapResponse<Vec<AlbumItem>> for response::MusicArtistAlbums {
mapper.map_response(grid.grid_renderer.items);
}
mapper.check_unknown()?;
let mapped = mapper.group_items();
Ok(MapResult {

View file

@ -118,6 +118,10 @@ impl MapResponse<MusicCharts> for response::MusicCharts {
response::music_charts::ItemSection::None => {}
});
mapper_top.check_unknown()?;
mapper_trending.check_unknown()?;
mapper_other.check_unknown()?;
let mapped_top = mapper_top.conv_items::<TrackItem>();
let mut mapped_trending = mapper_trending.conv_items::<TrackItem>();
let mut mapped_other = mapper_other.group_items();

View file

@ -380,6 +380,9 @@ impl MapResponse<MusicRelated> for response::MusicRelated {
_ => {}
});
mapper.check_unknown()?;
mapper_tracks.check_unknown()?;
let mapped_tracks = mapper_tracks.conv_items();
let mut mapped = mapper.group_items();

View file

@ -72,6 +72,7 @@ impl<T: FromYtItem> MapResponse<Vec<T>> for response::MusicNew {
let mut mapper = MusicListMapper::new(lang);
mapper.map_response(items);
mapper.check_unknown()?;
Ok(mapper.conv_items())
}

View file

@ -156,6 +156,7 @@ impl MapResponse<MusicPlaylist> for response::MusicPlaylist {
let mut mapper = MusicListMapper::new(lang);
mapper.map_response(shelf.contents);
mapper.check_unknown()?;
let map_res = mapper.conv_items();
let ctoken = shelf

View file

@ -272,6 +272,7 @@ impl MapResponse<MusicSearchResult> for response::MusicSearch {
response::music_search::ItemSection::None => {}
});
mapper.check_unknown()?;
let map_res = mapper.group_items();
Ok(MapResult {
@ -329,6 +330,7 @@ impl<T: FromYtItem> MapResponse<MusicSearchFiltered<T>> for response::MusicSearc
response::music_search::ItemSection::None => {}
});
mapper.check_unknown()?;
let map_res = mapper.conv_items();
Ok(MapResult {
@ -373,6 +375,7 @@ impl MapResponse<MusicSearchSuggestion> for response::MusicSearchSuggestion {
}
}
mapper.check_unknown()?;
let map_res = mapper.conv_items();
Ok(MapResult {

View file

@ -2,6 +2,7 @@ use serde::Deserialize;
use serde_with::{rust::deserialize_ignore_any, serde_as, DefaultOnError, VecSkipError};
use crate::{
error::ExtractionError,
model::{
self, traits::FromYtItem, AlbumId, AlbumItem, AlbumType, ArtistId, ArtistItem, ChannelId,
MusicItem, MusicItemType, MusicPlaylistItem, TrackItem,
@ -428,6 +429,8 @@ pub(crate) struct MusicListMapper {
artist_page: bool,
items: Vec<MusicItem>,
warnings: Vec<String>,
/// True if unknown items were mapped
has_unknown: bool,
}
#[derive(Debug)]
@ -447,6 +450,7 @@ impl MusicListMapper {
artist_page: false,
items: Vec::new(),
warnings: Vec::new(),
has_unknown: false,
}
}
@ -459,6 +463,7 @@ impl MusicListMapper {
artist_page: true,
items: Vec::new(),
warnings: Vec::new(),
has_unknown: false,
}
}
@ -471,6 +476,7 @@ impl MusicListMapper {
artist_page: false,
items: Vec::new(),
warnings: Vec::new(),
has_unknown: false,
}
}
@ -759,6 +765,10 @@ impl MusicListMapper {
}
// Tracks were already handled above
MusicPageType::Track { .. } => unreachable!(),
MusicPageType::Unknown => {
self.has_unknown = true;
Ok(None)
}
}
}
None => {
@ -893,6 +903,10 @@ impl MusicListMapper {
Ok(Some(MusicItemType::Playlist))
}
MusicPageType::None => Ok(None),
MusicPageType::Unknown => {
self.has_unknown = true;
Ok(None)
}
},
None => Err("could not determine item type".to_owned()),
}
@ -1028,6 +1042,10 @@ impl MusicListMapper {
Some(MusicItemType::Playlist)
}
MusicPageType::None => None,
MusicPageType::Unknown => {
self.has_unknown = true;
None
}
},
None => {
self.warnings
@ -1102,6 +1120,13 @@ impl MusicListMapper {
warnings: self.warnings,
}
}
pub fn check_unknown(&self) -> Result<(), ExtractionError> {
match self.has_unknown {
true => Err(ExtractionError::InvalidData("unknown YTM items".into())),
false => Ok(()),
}
}
}
/// Map TextComponents containing artist names to a list of artists and a 'Various Artists' flag

View file

@ -160,15 +160,18 @@ pub(crate) enum PageType {
Channel,
#[serde(rename = "MUSIC_PAGE_TYPE_PLAYLIST", alias = "WEB_PAGE_TYPE_PLAYLIST")]
Playlist,
#[serde(rename = "MUSIC_PAGE_TYPE_UNKNOWN")]
Unknown,
}
impl PageType {
pub(crate) fn to_url_target(self, id: String) -> UrlTarget {
pub(crate) fn to_url_target(self, id: String) -> Option<UrlTarget> {
match self {
PageType::Artist => UrlTarget::Channel { id },
PageType::Album => UrlTarget::Album { id },
PageType::Channel => UrlTarget::Channel { id },
PageType::Playlist => UrlTarget::Playlist { id },
PageType::Artist => Some(UrlTarget::Channel { id }),
PageType::Album => Some(UrlTarget::Album { id }),
PageType::Channel => Some(UrlTarget::Channel { id }),
PageType::Playlist => Some(UrlTarget::Playlist { id }),
PageType::Unknown => None,
}
}
}
@ -179,6 +182,7 @@ pub(crate) enum MusicPageType {
Album,
Playlist,
Track { is_video: bool },
Unknown,
None,
}
@ -189,6 +193,7 @@ impl From<PageType> for MusicPageType {
PageType::Album => MusicPageType::Album,
PageType::Playlist => MusicPageType::Playlist,
PageType::Channel => MusicPageType::None,
PageType::Unknown => MusicPageType::Unknown,
}
}
}

View file

@ -76,7 +76,7 @@ impl RustyPipeQuery {
let response = self
.client
.http_request_txt(self.client.inner.http.get(url).build()?)
.http_request_txt(&self.client.inner.http.get(url).build()?)
.await?;
let parsed = serde_json::from_str::<response::SearchSuggestion>(&response)

View file

@ -314,7 +314,7 @@ impl MapResponse<UrlTarget> for response::ResolvedUrl {
.browse_endpoint
.ok_or(ExtractionError::InvalidData(Cow::Borrowed("No browse ID")))?;
let page_type = self
let target = self
.endpoint
.command_metadata
.map(|c| c.web_command_metadata.web_page_type)
@ -323,10 +323,11 @@ impl MapResponse<UrlTarget> for response::ResolvedUrl {
.browse_endpoint_context_supported_configs
.map(|c| c.browse_endpoint_context_music_config.page_type)
})
.and_then(|pt| pt.to_url_target(browse_endpoint.browse_id))
.ok_or(ExtractionError::InvalidData(Cow::Borrowed("No page type")))?;
Ok(MapResult {
c: page_type.to_url_target(browse_endpoint.browse_id),
c: target,
warnings: Vec::new(),
})
}

View file

@ -2,6 +2,8 @@
use std::{borrow::Cow, fmt::Display};
use reqwest::StatusCode;
/// Error type for the RustyPipe library
#[derive(thiserror::Error, Debug)]
#[non_exhaustive]
@ -177,14 +179,32 @@ impl From<serde_plain::Error> for Error {
}
}
impl ExtractionError {
impl Error {
/// Return true if a report should be generated
pub(crate) fn should_report(&self) -> bool {
matches!(
self,
ExtractionError::InvalidData(_) | ExtractionError::WrongResult(_)
Self::HttpStatus(_, _)
| Self::Extraction(ExtractionError::InvalidData(_))
| Self::Extraction(ExtractionError::WrongResult(_))
)
}
/// Return true if the request should be retried
pub(crate) fn should_retry(&self) -> bool {
match self {
Self::HttpStatus(code, _) => match StatusCode::try_from(*code) {
Ok(status) => status.is_server_error() || status == StatusCode::TOO_MANY_REQUESTS,
Err(_) => false,
},
Self::Extraction(ExtractionError::InvalidData(_)) => true,
_ => false,
}
}
}
impl ExtractionError {
/// Return true if the video should be fetched with a different client
pub(crate) fn switch_client(&self) -> bool {
matches!(
self,

View file

@ -384,9 +384,9 @@ impl From<TextComponent> for crate::model::richtext::TextComponent {
text,
page_type,
browse_id,
} => Self::YouTube {
text,
target: page_type.to_url_target(browse_id),
} => match page_type.to_url_target(browse_id) {
Some(target) => Self::YouTube { text, target },
None => Self::Text(text),
},
TextComponent::Web { text, url } => Self::Web {
text,

View file

@ -1254,7 +1254,7 @@ fn startpage(rp: RustyPipe) {
// The startpage requires visitor data to fetch continuations
assert!(startpage.visitor_data.is_some());
assert_next(startpage, rp.query(), 12, 2);
assert_next(startpage, rp.query(), 8, 2);
}
#[rstest]