feat: add number_tokens for parsing large nums to dictionary

2022-09-23 15:04:22 +02:00 · 2022-09-23 15:04:22 +02:00 · 5d19259a14
commit 5d19259a14
parent 67ae1eb21d
21 changed files with 5219 additions and 38 deletions
--- a/codegen/src/collect_large_numbers.rs
+++ b/codegen/src/collect_large_numbers.rs
@ -0,0 +1,358 @@
+use std::collections::HashMap;
+use std::{collections::BTreeMap, fs::File, io::BufReader, path::Path};
+
+use anyhow::{Context, Result};
+use fancy_regex::Regex;
+use futures::{stream, StreamExt};
+use once_cell::sync::Lazy;
+use reqwest::{header, Client};
+use rustypipe::model::{locale::LANGUAGES, Language};
+use serde::Deserialize;
+use serde_with::serde_as;
+use serde_with::VecSkipError;
+
+use crate::util::{self, Text};
+
+type CollectedNumbers = BTreeMap<Language, BTreeMap<u8, (String, u64)>>;
+
+/// Collect video view count texts in every supported language
+/// and write them to `testfiles/dict/large_number_samples.json`.
+///
+/// YouTube's API outputs the subscriber count of a channel only in a
+/// approximated format (e.g *880K subscribers*), which varies
+/// by language.
+///
+/// To parse these numbers correctly we need to collect textual numbers
+/// of different orders of magnitude in every language. This script extracts
+/// the view count texts from the most popular videos of different channels.
+///
+/// We extract these instead of subscriber counts because the YouTube API
+/// outputs view counts both in approximated and exact format, so we can use
+/// the exact counts to figure out the tokens.
+pub async fn collect_large_numbers(project_root: &Path, concurrency: usize) {
+    let mut json_path = project_root.to_path_buf();
+    json_path.push("testfiles/dict/large_number_samples.json");
+
+    let channels = [
+        "UCq-Fj5jknLsUf-MWSy4_brA", // 10e8 (225M)
+        "UCcdwLMPsaU2ezNSJU1nFoBQ", // 10e7 (60M)
+        "UC6mIxFTvXkWQVEHPsEdflzQ", // 10e6 (1.7M)
+        "UCD0y51PJfvkZNe3y3FR5riw", // 10e5 (125K)
+        "UCNcN0dW43zE0Om3278fjY8A", // 10e4 (27K)
+        "UC0QEucPrn0-Ddi3JBTcs5Kw", // 10e3 (5K)
+        "UCGiJh0NZ52wRhYKYnuZI08Q", // 10e1 (37)
+    ];
+
+    let collected_numbers: CollectedNumbers = stream::iter(LANGUAGES)
+        .map(|lang| async move {
+            let mut entry = BTreeMap::new();
+
+            for (n, ch_id) in channels.iter().enumerate() {
+                let channel = get_channel(ch_id, lang)
+                    .await
+                    .context(format!("{}-{}", lang, n))
+                    .unwrap();
+
+                channel.view_counts.iter().for_each(|(num, txt)| {
+                    entry.insert(get_mag(*num), (txt.to_owned(), *num));
+                });
+
+                println!("collected {}-{}", lang, n);
+            }
+
+            (lang, entry)
+        })
+        .buffer_unordered(concurrency)
+        .collect()
+        .await;
+
+    let file = File::create(json_path).unwrap();
+    serde_json::to_writer_pretty(file, &collected_numbers).unwrap();
+}
+
+/// Attempt to parse the numbers collected by `collect-large-numbers`
+/// and write the results to `dictionary.json`.
+pub fn write_samples_to_dict(project_root: &Path) {
+    let mut json_path = project_root.to_path_buf();
+    json_path.push("testfiles/dict/large_number_samples.json");
+
+    let json_file = File::open(json_path).unwrap();
+    let collected_nums: CollectedNumbers =
+        serde_json::from_reader(BufReader::new(json_file)).unwrap();
+    let mut dict = util::read_dict(project_root);
+    let langs = dict.keys().map(|k| k.to_owned()).collect::<Vec<_>>();
+
+    static POINT_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"\d(\.|,)\d{1,3}(?:\D|$)").unwrap());
+
+    for lang in langs {
+        let dict_entry = dict.entry(lang).or_default();
+
+        let mut e_langs = dict_entry.equivalent.clone();
+        e_langs.push(lang);
+
+        let comma_decimal = collected_nums
+            .get(&lang)
+            .unwrap()
+            .iter()
+            .find_map(|(mag, (txt, _))| {
+                let point = POINT_REGEX
+                    .captures(txt)
+                    .unwrap()
+                    .map(|c| c.get(1).unwrap().as_str());
+
+                if let Some(point) = point {
+                    let num_all = util::parse_numeric::<u64>(txt).unwrap();
+                    // If the number parsed from all digits has the same order of
+                    // magnitude as the actual number, it must be a separator.
+                    // Otherwise it is a decimal point
+                    return Some((get_mag(num_all) == *mag) ^ (point == ","));
+                }
+                None
+            })
+            .unwrap();
+
+        let decimal_point = match comma_decimal {
+            true => ",",
+            false => ".",
+        };
+
+        // Search for tokens
+
+        // This map holds all the tokens we encounter while parsing the language
+        // If a new token is found, it is stored in this map with the derived order of
+        // magnitude.
+        // If the token is found again with a different derived order of magnitude,
+        // its value in the map is set to None.
+        let mut found_tokens: HashMap<String, Option<u8>> = HashMap::new();
+
+        let mut insert_token = |token: String, mag: u8| {
+            let found_token = found_tokens.entry(token).or_insert(match mag {
+                0 => None,
+                x => Some(x),
+            });
+
+            if let Some(f) = found_token {
+                if *f != mag {
+                    *found_token = None;
+                }
+            }
+        };
+
+        for lang in e_langs {
+            let entry = collected_nums.get(&lang).unwrap();
+
+            entry.iter().for_each(|(mag, (txt, _))| {
+                let filtered = util::filter_largenumstr(txt);
+
+                let tokens: Vec<String> = match dict_entry.by_char {
+                    true => filtered.chars().map(|c| c.to_string()).collect(),
+                    false => filtered.split_whitespace().map(|c| c.to_string()).collect(),
+                };
+
+                let num_before_point =
+                    util::parse_numeric::<u64>(txt.split(decimal_point).next().unwrap()).unwrap();
+                let mag_before_point = get_mag(num_before_point);
+                let mut mag_remaining = mag - mag_before_point;
+
+                tokens.iter().for_each(|t| {
+                    // These tokens are correct in all languages
+                    // and are used to parse combined prefixes like `1.1K crore` (en-IN)
+                    let known_tmag: u8 = if t.len() == 1 {
+                        match t.as_str() {
+                            "K" | "k" => 3,
+                            "M" => 6,
+                            // 'm' means 10^3 in Catalan, 'B' means 10^3 in Turkish
+                            _ => 0,
+                        }
+                    } else {
+                        0
+                    };
+
+                    // K/M/B
+                    if known_tmag > 0 {
+                        mag_remaining = mag_remaining
+                            .checked_sub(known_tmag)
+                            .expect("known magnitude incorrect");
+                    } else {
+                        insert_token(t.to_owned(), mag_remaining);
+                    }
+                });
+            });
+        }
+
+        // Insert collected data into dictionary
+        dict_entry.number_tokens = found_tokens
+            .into_iter()
+            .filter_map(|(k, v)| v.map(|v| (k, v)))
+            .collect();
+        dict_entry.comma_decimal = comma_decimal;
+    }
+
+    util::write_dict(project_root, &dict);
+}
+
+fn get_mag(n: u64) -> u8 {
+    (n as f64).log10().floor() as u8
+}
+
+/*
+YouTube channel videos response
+*/
+
+#[derive(Clone, Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+struct Channel {
+    contents: Contents,
+}
+
+#[derive(Clone, Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+struct Contents {
+    two_column_browse_results_renderer: TabsRenderer,
+}
+
+#[serde_as]
+#[derive(Clone, Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+struct TabsRenderer {
+    #[serde_as(as = "VecSkipError<_>")]
+    tabs: Vec<TabRendererWrap>,
+}
+
+#[derive(Clone, Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+struct TabRendererWrap {
+    tab_renderer: TabRenderer,
+}
+
+#[derive(Clone, Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+struct TabRenderer {
+    content: SectionListRendererWrap,
+}
+
+#[derive(Clone, Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+struct SectionListRendererWrap {
+    section_list_renderer: SectionListRenderer,
+}
+
+#[derive(Clone, Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+struct SectionListRenderer {
+    contents: Vec<ItemSectionRendererWrap>,
+}
+
+#[derive(Clone, Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+struct ItemSectionRendererWrap {
+    item_section_renderer: ItemSectionRenderer,
+}
+
+#[derive(Clone, Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+struct ItemSectionRenderer {
+    contents: Vec<GridRendererWrap>,
+}
+
+#[derive(Clone, Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+struct GridRendererWrap {
+    grid_renderer: GridRenderer,
+}
+
+#[serde_as]
+#[derive(Clone, Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+struct GridRenderer {
+    #[serde_as(as = "VecSkipError<_>")]
+    items: Vec<VideoListItem>,
+}
+
+#[derive(Clone, Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+struct VideoListItem {
+    grid_video_renderer: GridVideoRenderer,
+}
+
+#[derive(Clone, Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+struct GridVideoRenderer {
+    /// `24,194 views`
+    view_count_text: Text,
+    /// `19K views`
+    short_view_count_text: Text,
+}
+
+#[derive(Clone, Debug)]
+struct ChannelData {
+    view_counts: Vec<(u64, String)>,
+}
+
+async fn get_channel(channel_id: &str, lang: Language) -> Result<ChannelData> {
+    let client = Client::new();
+
+    let body = format!(
+        "{}{}{}{}{}",
+        r##"{"context":{"client":{"clientName":"WEB","clientVersion":"2.20220914.06.00","platform":"DESKTOP","originalUrl":"https://www.youtube.com/","hl":""##,
+        lang,
+        r##"","gl":"US"},"request":{"internalExperimentFlags":[],"useSsl":true},"user":{"lockedSafetyMode":false}},"params":"EgZ2aWRlb3MYASAAMAE%3D","browseId":""##,
+        channel_id,
+        "\"}"
+    );
+
+    let resp = client
+        .post("https://www.youtube.com/youtubei/v1/browse?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8&prettyPrint=false")
+        .header(header::CONTENT_TYPE, "application/json")
+        .body(body)
+        .send().await?
+        .error_for_status()?;
+
+    let channel = resp.json::<Channel>().await?;
+
+    Ok(ChannelData {
+        view_counts: channel
+            .contents
+            .two_column_browse_results_renderer
+            .tabs
+            .get(0)
+            .map(|tab| {
+                tab.tab_renderer.content.section_list_renderer.contents[0]
+                    .item_section_renderer
+                    .contents[0]
+                    .grid_renderer
+                    .items
+                    .iter()
+                    .map(|itm| {
+                        (
+                            util::parse_numeric(
+                                &itm.grid_video_renderer.view_count_text.simple_text,
+                            )
+                            .unwrap(),
+                            itm.grid_video_renderer
+                                .short_view_count_text
+                                .simple_text
+                                .to_owned(),
+                        )
+                    })
+                    .collect()
+            })
+            .unwrap_or_default(),
+    })
+}
+
+#[tokio::test]
+async fn test() {
+    let channel = get_channel("UCcdwLMPsaU2ezNSJU1nFoBQ", Language::Az)
+        .await
+        .unwrap();
+
+    dbg!(channel);
+}
+
+#[test]
+fn test2() {
+    write_samples_to_dict(Path::new(
+        "/home/thetadev/Documents/Programmieren/Rust/rustypipe",
+    ));
+}
--- a/codegen/src/collect_playlist_dates.rs
+++ b/codegen/src/collect_playlist_dates.rs
@ -38,7 +38,7 @@ enum DateCase {
 }

 /// Collect 'Playlist updated' dates in every supported language
-/// and write them to `testfiles/date/playlist_samples.json`.
+/// and write them to `testfiles/dict/playlist_samples.json`.
 ///
 /// YouTube's API outputs the update date of playlists only in a
 /// textual format (e.g. *Last updated on Jan 3, 2020*), which varies
@ -55,13 +55,15 @@ enum DateCase {
 /// - one playlist updated yesterday
 /// - one playlist updated 2-7 days ago
 /// - one playlist from every month. Note that there should not
-/// be any dates which include the same number twice (e.g. 01.01.2020).
+///   be any dates which include the same number twice (e.g. 01.01.2020).
+///
+/// **IMPORTANT:**
 ///
 /// Because the relative dates change with time, the first three playlists
-/// should be checked and eventually changed before running the program.
+/// have to checked and eventually changed before running the program.
 pub async fn collect_dates(project_root: &Path, concurrency: usize) {
    let mut json_path = project_root.to_path_buf();
-    json_path.push("testfiles/date/playlist_samples.json");
+    json_path.push("testfiles/dict/playlist_samples.json");

    // These are the sample playlists
    let cases = [
@ -115,7 +117,7 @@ pub async fn collect_dates(project_root: &Path, concurrency: usize) {
 /// parsed automatically and require manual work.
 pub fn write_samples_to_dict(project_root: &Path) {
    let mut json_path = project_root.to_path_buf();
-    json_path.push("testfiles/date/playlist_samples.json");
+    json_path.push("testfiles/dict/playlist_samples.json");

    let json_file = File::open(json_path).unwrap();
    let collected_dates: CollectedDates =
--- a/codegen/src/download_testfiles.rs
+++ b/codegen/src/download_testfiles.rs
@ -146,7 +146,7 @@ async fn video_details(testfiles: &Path) {
 async fn comments_top(testfiles: &Path) {
    let mut json_path = testfiles.to_path_buf();
    json_path.push("video_details");
-    json_path.push(format!("comments_top.json"));
+    json_path.push("comments_top.json");
    if json_path.exists() {
        return;
    }
--- a/codegen/src/gen_dictionary.rs
+++ b/codegen/src/gen_dictionary.rs
@ -34,17 +34,47 @@ pub fn generate_dictionary(project_root: &Path) {
    let dict = util::read_dict(project_root);

    let code_head = r#"// This file is automatically generated. DO NOT EDIT.
+// See codegen/gen_dictionary.rs for the generation code.
 use crate::{
    model::Language,
    timeago::{DateCmp, TaToken, TimeUnit},
 };

+/// The dictionary contains the information required to parse dates and numbers
+/// in all supported languages.
 pub struct Entry {
+    /// Should the language be parsed by character instead of by word?
+    /// (e.g. Chinese/Japanese)
    pub by_char: bool,
+    /// Tokens for parsing timeago strings.
+    ///
+    /// Format: Parsed token -> \[Quantity\] Identifier
+    ///
+    /// Identifiers: `Y`(ear), `M`(month), `W`(eek), `D`(ay),
+    /// `h`(our), `m`(inute), `s`(econd)
    pub timeago_tokens: phf::Map<&'static str, TaToken>,
+    /// Order in which to parse numeric date components. Formatted as
+    /// a string of date identifiers (Y, M, D).
+    ///
+    /// Examples:
+    ///
+    /// - 03.01.2020 => `"DMY"`
+    /// - Jan 3, 2020 => `"DY"`
    pub date_order: &'static [DateCmp],
+    /// Tokens for parsing month names.
+    ///
+    /// Format: Parsed token -> Month number (starting from 1)
    pub months: phf::Map<&'static str, u8>,
+    /// Tokens for parsing date strings with no digits (e.g. Today, Tomorrow)
+    ///
+    /// Format: Parsed token -> \[Quantity\] Identifier
    pub timeago_nd_tokens: phf::Map<&'static str, TaToken>,
+    /// Are commas (instead of points) used as decimal separators?
+    pub comma_decimal: bool,
+    /// Tokens for parsing decimal prefixes (K, M, B, ...)
+    ///
+    /// Format: Parsed token -> decimal power
+    pub number_tokens: phf::Map<&'static str, u8>,
 }
 "#;

@ -100,12 +130,19 @@ pub fn entry(lang: Language) -> Entry {
        });
        date_order = date_order.trim_end_matches([' ', ',']).to_owned() + "]";

+        // Number tokens
+        let mut number_tokens = phf_codegen::Map::<&str>::new();
+        entry.number_tokens.iter().for_each(|(txt, mag)| {
+            number_tokens.entry(txt, &mag.to_string());
+        });
+
        let code_ta_tokens = &ta_tokens.build().to_string().replace('\n', "\n            ");
        let code_ta_nd_tokens = &ta_nd_tokens.build().to_string().replace('\n', "\n            ");
        let code_months = &months.build().to_string().replace('\n', "\n            ");
+        let code_number_tokens = &number_tokens.build().to_string().replace('\n', "\n            ");

-        let _ = write!(code_timeago_tokens, "{} => Entry {{\n            by_char: {:?},\n            timeago_tokens: {},\n            date_order: {},\n            months: {},\n            timeago_nd_tokens: {},\n        }},\n        ",
-        selector, entry.by_char, code_ta_tokens, date_order, code_months, code_ta_nd_tokens);
+        let _ = write!(code_timeago_tokens, "{} => Entry {{\n            by_char: {:?},\n            timeago_tokens: {},\n            date_order: {},\n            months: {},\n            timeago_nd_tokens: {},\n            comma_decimal: {:?},\n            number_tokens: {},\n        }},\n        ",
+        selector, entry.by_char, code_ta_tokens, date_order, code_months, code_ta_nd_tokens, entry.comma_decimal, code_number_tokens);
    });

    code_timeago_tokens = code_timeago_tokens.trim_end().to_owned() + "\n    }\n}\n";
--- a/codegen/src/gen_locales.rs
+++ b/codegen/src/gen_locales.rs
@ -8,6 +8,8 @@ use serde::Deserialize;
 use serde_with::serde_as;
 use serde_with::VecSkipError;

+use crate::util::Text;
+
 #[serde_as]
 #[derive(Clone, Debug, Deserialize)]
 #[serde(rename_all = "camelCase")]
@ -135,12 +137,6 @@ struct LanguageCountryCommand {
    hl: String,
 }

-#[derive(Clone, Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-struct Text {
-    simple_text: String,
-}
-
 pub async fn generate_locales(project_root: &Path) {
    let (languages, countries) = get_locales().await;

@ -284,7 +280,7 @@ pub enum Country {
 async fn get_locales() -> (BTreeMap<String, String>, BTreeMap<String, String>) {
    let client = Client::new();
    let resp = client
-        .post("https://www.youtube.com/youtubei/v1/account/account_menu?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8")
+        .post("https://www.youtube.com/youtubei/v1/account/account_menu?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8&prettyPrint=false")
        .header(header::CONTENT_TYPE, "application/json")
        .body(
            r##"{"context":{"client":{"clientName":"WEB","clientVersion":"2.20220914.06.00","platform":"DESKTOP","originalUrl":"https://www.youtube.com/","hl":"en","gl":"US"},"request":{"internalExperimentFlags":[],"useSsl":true},"user":{"lockedSafetyMode":false}}}"##
--- a/codegen/src/main.rs
+++ b/codegen/src/main.rs
@ -1,3 +1,4 @@
+mod collect_large_numbers;
 mod collect_playlist_dates;
 mod download_testfiles;
 mod gen_dictionary;
@ -21,7 +22,9 @@ struct Cli {
 #[derive(Subcommand)]
 enum Commands {
    CollectPlaylistDates,
-    WritePlaylistDates,
+    CollectLargeNumbers,
+    ParsePlaylistDates,
+    ParseLargeNumbers,
    GenLocales,
    GenDict,
    DownloadTestfiles,
@ -36,8 +39,14 @@ async fn main() {
        Commands::CollectPlaylistDates => {
            collect_playlist_dates::collect_dates(&cli.project_root, cli.concurrency).await;
        }
-        Commands::WritePlaylistDates => {
-            collect_playlist_dates::write_samples_to_dict(&cli.project_root);
+        Commands::CollectLargeNumbers => {
+            collect_large_numbers::collect_large_numbers(&cli.project_root, cli.concurrency).await;
+        }
+        Commands::ParsePlaylistDates => {
+            collect_playlist_dates::write_samples_to_dict(&cli.project_root)
+        }
+        Commands::ParseLargeNumbers => {
+            collect_large_numbers::write_samples_to_dict(&cli.project_root)
        }
        Commands::GenLocales => {
            gen_locales::generate_locales(&cli.project_root).await;
--- a/codegen/src/util.rs
+++ b/codegen/src/util.rs
@ -3,19 +3,53 @@ use std::{collections::BTreeMap, fs::File, io::BufReader, path::Path, str::FromS
 use rustypipe::model::Language;
 use serde::{Deserialize, Serialize};

-const DICT_PATH: &str = "testfiles/date/dictionary.json";
+const DICT_PATH: &str = "testfiles/dict/dictionary.json";

 type Dictionary = BTreeMap<Language, DictEntry>;

 #[derive(Debug, Default, Serialize, Deserialize)]
 #[serde(default)]
 pub struct DictEntry {
+    /// List of languages that should be treated equally (e.g. EnUs/EnGb/EnIn)
    pub equivalent: Vec<Language>,
+    /// Should the language be parsed by character instead of by word?
+    /// (e.g. Chinese/Japanese)
    pub by_char: bool,
+    /// Tokens for parsing timeago strings.
+    ///
+    /// Format: Parsed token -> \[Quantity\] Identifier
+    ///
+    /// Identifiers: `Y`(ear), `M`(month), `W`(eek), `D`(ay),
+    /// `h`(our), `m`(inute), `s`(econd)
    pub timeago_tokens: BTreeMap<String, String>,
+    /// Order in which to parse numeric date components. Formatted as
+    /// a string of date identifiers (Y, M, D).
+    ///
+    /// Examples:
+    ///
+    /// - 03.01.2020 => `"DMY"`
+    /// - Jan 3, 2020 => `"DY"`
    pub date_order: String,
+    /// Tokens for parsing month names.
+    ///
+    /// Format: Parsed token -> Month number (starting from 1)
    pub months: BTreeMap<String, u8>,
+    /// Tokens for parsing date strings with no digits (e.g. Today, Tomorrow)
+    ///
+    /// Format: Parsed token -> \[Quantity\] Identifier
    pub timeago_nd_tokens: BTreeMap<String, String>,
+    /// Are commas (instead of points) used as decimal separators?
+    pub comma_decimal: bool,
+    /// Tokens for parsing decimal prefixes (K, M, B, ...)
+    ///
+    /// Format: Parsed token -> decimal power
+    pub number_tokens: BTreeMap<String, u8>,
+}
+
+#[derive(Clone, Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct Text {
+    pub simple_text: String,
 }

 pub fn read_dict(project_root: &Path) -> Dictionary {
@ -48,6 +82,27 @@ pub fn filter_datestr(string: &str) -> String {
        .collect()
 }

+pub fn filter_largenumstr(string: &str) -> String {
+    string
+        .chars()
+        .filter(|c| !matches!(c, '\u{200b}' | '.' | ',') && !c.is_ascii_digit())
+        .collect()
+}
+
+/// Parse a string after removing all non-numeric characters
+pub fn parse_numeric<F>(string: &str) -> Result<F, F::Err>
+where
+    F: FromStr,
+{
+    let mut buf = String::new();
+    for c in string.chars() {
+        if c.is_ascii_digit() {
+            buf.push(c);
+        }
+    }
+    buf.parse()
+}
+
 /// Parse all numbers occurring in a string and reurn them as a vec
 pub fn parse_numeric_vec<F>(string: &str) -> Vec<F>
 where