fix: wikipedia url extraction with parentheses

This commit is contained in:
ThetaDev 2023-02-09 14:06:03 +01:00
parent 055d266809
commit e22f1e7cd5
2 changed files with 12 additions and 6 deletions

View file

@ -272,12 +272,18 @@ fn map_artist_page(
let mapped = mapper.group_items();
static WIKIPEDIA_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"https://[a-z]+\.wikipedia.org/wiki/[^()\s]+").unwrap());
Lazy::new(|| Regex::new(r"\(?https://[a-z\d-]+\.wikipedia.org/wiki/[^\s]+").unwrap());
let wikipedia_url = header.description.as_deref().and_then(|h| {
WIKIPEDIA_REGEX
.captures(h)
.and_then(|c| c.get(0))
.map(|m| m.as_str().to_owned())
WIKIPEDIA_REGEX.captures(h).and_then(|c| c.get(0)).map(|m| {
let m = m.as_str();
match m.strip_prefix('(') {
Some(m) => match m.strip_suffix(')') {
Some(m) => m.to_owned(),
None => m.to_owned(),
},
None => m.to_owned(),
}
})
});
let radio_id = header.start_radio_button.and_then(|b| {

View file

@ -33,7 +33,7 @@ MusicArtist(
),
],
description: Some("Senta-Sofia Delliponti is a German singer, songwriter and actress. Since January 2014, she used the stage name Oonagh, until she changed it to Senta in 2022. Her signature musical style is inspired by the mystical lore of J. R. R. Tolkien\'s universe and by ethnic sounds throughout the world.\n\nFrom Wikipedia (https://en.wikipedia.org/wiki/Oonagh_(singer)) under Creative Commons Attribution CC-BY-SA 3.0 (http://creativecommons.org/licenses/by-sa/3.0/legalcode)"),
wikipedia_url: Some("https://en.wikipedia.org/wiki/Oonagh_"),
wikipedia_url: Some("https://en.wikipedia.org/wiki/Oonagh_(singer)"),
subscriber_count: Some(34200),
tracks: [
TrackItem(