Replace hand-rolled urlencoded_decode with url::form_urlencoded::parse
The previous decoder treated each %XX as an isolated code point via `out.push(v as char)`. For UTF-8 multi-byte sequences (e.g. %E2%9C%93 for ✓) that produced three garbage chars at U+00E2 / U+009C / U+0093 instead of the proper U+2713. YT cipher strings are typically ASCII- only so this was latent, but the function was named generically and nothing in the type system prevented a non-ASCII input from reaching it. `url::form_urlencoded::parse` is the canonical &-separated query-pair parser — handles %-decode as UTF-8, handles + → space, and the url crate is already a transitive dep. parse_cipher_string collapses to one line; the bespoke 20-line decoder goes.
This commit is contained in:
parent
1292688827
commit
75bc7dc6bf
1 changed files with 10 additions and 34 deletions
|
|
@ -558,40 +558,16 @@ fn process_url(
|
|||
}
|
||||
|
||||
fn parse_cipher_string(s: &str) -> std::collections::BTreeMap<String, String> {
|
||||
let mut out = std::collections::BTreeMap::new();
|
||||
for pair in s.split('&') {
|
||||
if let Some((k, v)) = pair.split_once('=') {
|
||||
out.insert(
|
||||
urlencoded_decode(k),
|
||||
urlencoded_decode(v),
|
||||
);
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn urlencoded_decode(s: &str) -> String {
|
||||
let mut out = String::with_capacity(s.len());
|
||||
let bytes = s.as_bytes();
|
||||
let mut i = 0;
|
||||
while i < bytes.len() {
|
||||
let b = bytes[i];
|
||||
if b == b'%' && i + 2 < bytes.len() {
|
||||
let hex = std::str::from_utf8(&bytes[i + 1..i + 3]).unwrap_or("");
|
||||
if let Ok(v) = u8::from_str_radix(hex, 16) {
|
||||
out.push(v as char);
|
||||
i += 3;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if b == b'+' {
|
||||
out.push(' ');
|
||||
} else {
|
||||
out.push(b as char);
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
out
|
||||
// `url::form_urlencoded::parse` decodes percent-escapes as UTF-8
|
||||
// multi-byte sequences and handles `+` → space — both of which the
|
||||
// prior hand-rolled `urlencoded_decode` got wrong (it treated each
|
||||
// %XX as an isolated code point, so `%E2%9C%93` rendered as three
|
||||
// garbage chars instead of ✓). YT cipher strings are typically
|
||||
// ASCII-only, but pulling in the canonical parser closes the
|
||||
// surface and removes 20 lines.
|
||||
url::form_urlencoded::parse(s.as_bytes())
|
||||
.map(|(k, v)| (k.into_owned(), v.into_owned()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn build_video_progressive(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue