From 75bc7dc6bf931075395b73154e93b4fae3cfb0d2 Mon Sep 17 00:00:00 2001
From: Kayos <kayos@sulkta.com>
Date: Tue, 26 May 2026 22:52:27 -0700
Subject: [PATCH] Replace hand-rolled urlencoded_decode with
 url::form_urlencoded::parse
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous decoder treated each %XX as an isolated code point via
`out.push(v as char)`. For UTF-8 multi-byte sequences (e.g. %E2%9C%93
for ✓) that produced three garbage chars at U+00E2 / U+009C / U+0093
instead of the proper U+2713. YT cipher strings are typically ASCII-
only so this was latent, but the function was named generically and
nothing in the type system prevented a non-ASCII input from reaching it.

`url::form_urlencoded::parse` is the canonical &-separated query-pair
parser — handles %-decode as UTF-8, handles + → space, and the url
crate is already a transitive dep. parse_cipher_string collapses to
one line; the bespoke 20-line decoder goes.
---
 src/youtube/stream_extractor.rs | 44 ++++++++-------------------------
 1 file changed, 10 insertions(+), 34 deletions(-)

diff --git a/src/youtube/stream_extractor.rs b/src/youtube/stream_extractor.rs
index 4840787..ced32a6 100644
--- a/src/youtube/stream_extractor.rs
+++ b/src/youtube/stream_extractor.rs
@@ -558,40 +558,16 @@ fn process_url(
 }
 
 fn parse_cipher_string(s: &str) -> std::collections::BTreeMap<String, String> {
-    let mut out = std::collections::BTreeMap::new();
-    for pair in s.split('&') {
-        if let Some((k, v)) = pair.split_once('=') {
-            out.insert(
-                urlencoded_decode(k),
-                urlencoded_decode(v),
-            );
-        }
-    }
-    out
-}
-
-fn urlencoded_decode(s: &str) -> String {
-    let mut out = String::with_capacity(s.len());
-    let bytes = s.as_bytes();
-    let mut i = 0;
-    while i < bytes.len() {
-        let b = bytes[i];
-        if b == b'%' && i + 2 < bytes.len() {
-            let hex = std::str::from_utf8(&bytes[i + 1..i + 3]).unwrap_or("");
-            if let Ok(v) = u8::from_str_radix(hex, 16) {
-                out.push(v as char);
-                i += 3;
-                continue;
-            }
-        }
-        if b == b'+' {
-            out.push(' ');
-        } else {
-            out.push(b as char);
-        }
-        i += 1;
-    }
-    out
+    // `url::form_urlencoded::parse` decodes percent-escapes as UTF-8
+    // multi-byte sequences and handles `+` → space — both of which the
+    // prior hand-rolled `urlencoded_decode` got wrong (it treated each
+    // %XX as an isolated code point, so `%E2%9C%93` rendered as three
+    // garbage chars instead of ✓). YT cipher strings are typically
+    // ASCII-only, but pulling in the canonical parser closes the
+    // surface and removes 20 lines.
+    url::form_urlencoded::parse(s.as_bytes())
+        .map(|(k, v)| (k.into_owned(), v.into_owned()))
+        .collect()
 }
 
 fn build_video_progressive(