fix: a/b test 12: parsing new channel page header

This commit is contained in:
ThetaDev 2024-01-29 17:09:42 +01:00
parent e5b8a9a9b0
commit 5275170f9a
No known key found for this signature in database
GPG key ID: 649CA4EBDC338394
8 changed files with 10925 additions and 12 deletions

View file

@ -9,6 +9,7 @@ use regex::Regex;
use rustypipe::client::{ClientType, RustyPipe, RustyPipeQuery, YTContext};
use rustypipe::model::{MusicItem, YouTubeItem};
use rustypipe::param::search_filter::{ItemType, SearchFilter};
use rustypipe::param::ChannelVideoTab;
use serde::de::IgnoredAny;
use serde::{Deserialize, Serialize};
@ -28,6 +29,7 @@ pub enum ABTest {
PlaylistsForShorts = 9,
ChannelAboutModal = 10,
LikeButtonViewmodel = 11,
ChannelPageHeader = 12,
}
const TESTS_TO_RUN: [ABTest; 3] = [
@ -102,6 +104,7 @@ pub async fn run_test(
ABTest::TrackViewcount => track_viewcount(&query).await,
ABTest::ChannelAboutModal => channel_about_modal(&query).await,
ABTest::LikeButtonViewmodel => like_button_viewmodel(&query).await,
ABTest::ChannelPageHeader => channel_page_header(&query).await,
}
.unwrap();
pb.inc(1);
@ -330,3 +333,10 @@ pub async fn like_button_viewmodel(rp: &RustyPipeQuery) -> Result<bool> {
.await?;
Ok(res.contains("\"segmentedLikeDislikeButtonViewModel\""))
}
pub async fn channel_page_header(rp: &RustyPipeQuery) -> Result<bool> {
let channel = rp
.channel_videos_tab("UCh8gHdtzO2tXd593_bjErWg", ChannelVideoTab::Shorts)
.await?;
Ok(channel.mobile_banner.is_empty() && channel.tv_banner.is_empty())
}

View file

@ -398,8 +398,8 @@ a short format (_21h ago_, _3d ago_).
- **Impact:** 🟡 Medium
- **Status:** Experimental (9%)
YouTube added the track playback count to search results and top artist tracks.
In exchange, they removed the "Song" type identifier from search results.
YouTube added the track playback count to search results and top artist tracks. In
exchange, they removed the "Song" type identifier from search results.
![A/B test 8 old screenshot](./_img/ab_8_old.png)
@ -436,7 +436,7 @@ by using the mobile client. But that may change in the future.
![A/B test 10 screenshot](./_img/ab_10.png)
YouTube replaced the *About* channel tab with a modal. This changes the way additional
YouTube replaced the _About_ channel tab with a modal. This changes the way additional
channel metadata has to be fetched.
The new modal uses a continuation request with a token which can be easily generated.
@ -450,7 +450,7 @@ the main tab.
- **Endpoint:** next
- **Status:** Experimental (7%)
YouTube introduced an updated date model for the like/dislike buttons. The new model
YouTube introduced an updated data model for the like/dislike buttons. The new model
looks needlessly complex but contains the same parsing-relevant data as the old model
(accessibility text to get like count).
@ -475,3 +475,112 @@ looks needlessly complex but contains the same parsing-relevant data as the old
}
}
```
## [12] New channel page header
- **Encountered on:** 29.01.2024
- **Impact:** 🟢 Low
- **Endpoint:** browse
- **Status:** Experimental (<1%)
YouTube introduced a new data model for channel headers, based on a
`"pageHeaderRenderer"`. The new model comes with more needless complexity that needs to
be accomodated. There are also no mobile/TV header images available any more.
```json
{
"pageHeaderViewModel": {
"title": {
"dynamicTextViewModel": {
"text": {
"content": "Doobydobap",
"attachmentRuns": [
{
"startIndex": 10,
"length": 0,
"element": {
"type": {
"imageType": {
"image": {
"sources": [
{
"clientResource": {
"imageName": "CHECK_CIRCLE_FILLED"
},
"width": 14,
"height": 14
}
]
}
}
}
}
}
]
}
}
},
"image": {
"decoratedAvatarViewModel": {
"avatar": {
"avatarViewModel": {
"image": {
"sources": [
{
"url": "https://yt3.googleusercontent.com/dm5Aq93xvVJz0NoVO88ieBkDXmuShCujGPlZ7qETMEPTrXvPUCFI3-BB6Xs_P-r6Uk3mnBy9zA=s72-c-k-c0x00ffffff-no-rj",
"width": 72,
"height": 72
}
]
}
}
}
}
},
"metadata": {
"contentMetadataViewModel": {
"metadataRows": [
{
"metadataParts": [
{
"text": {
"content": "@Doobydobap"
}
},
{
"text": {
"content": "3.74M subscribers"
}
},
{
"text": {
"content": "345 videos",
"styleRuns": [
{
"startIndex": 0,
"length": 10
}
]
}
}
]
}
]
}
},
"banner": {
"imageBannerViewModel": {
"image": {
"sources": [
{
"url": "https://yt3.googleusercontent.com/BvnAqgiursrXpmS9AgDLtkOSTQfOG_Dqn0KzY5hcwO9XrHTEQTVgaflI913f9KRp7d0U2qBp=w1060-fcrop64=1,00005a57ffffa5a8-k-c0xffffffff-no-nd-rj",
"width": 1060,
"height": 175
}
]
}
}
}
}
}
```

View file

@ -462,6 +462,8 @@ fn map_channel(
})
}),
avatar: hdata.map(|hdata| hdata.1.into()).unwrap_or_default(),
// Since the carousel header is only used for YT-internal channels or special events
// (World Cup, Coachella, etc.) we can assume the channel to be verified
verification: crate::model::Verification::Verified,
description: metadata.description,
tags: microformat.microformat_data_renderer.tags,
@ -475,6 +477,43 @@ fn map_channel(
content: (),
}
}
response::channel::Header::PageHeaderRenderer(header) => {
let hdata = header.content.page_header_view_model;
// channel handle - subscriber count - video count
let subscriber_count = hdata
.metadata
.content_metadata_view_model
.metadata_rows
.first()
.and_then(|md| {
md.metadata_parts.get(1).and_then(|t| {
util::parse_large_numstr_or_warn::<u64>(&t.text, lang, &mut warnings)
})
});
Channel {
id: metadata.external_id,
name: metadata.title,
subscriber_count,
avatar: hdata
.image
.decorated_avatar_view_model
.avatar
.avatar_view_model
.image
.into(),
verification: hdata.title.into(),
description: metadata.description,
tags: microformat.microformat_data_renderer.tags,
vanity_url,
banner: hdata.banner.image_banner_view_model.image.into(),
mobile_banner: Vec::new(),
tv_banner: Vec::new(),
has_shorts: d.has_shorts,
has_live: d.has_live,
visitor_data: d.visitor_data,
content: (),
}
}
},
warnings,
})
@ -680,6 +719,7 @@ mod tests {
#[case::coachella("videos_20230415_coachella", "UCHF66aWLOxBW4l6VkSrS3cQ")]
#[case::shorts("shorts", "UCh8gHdtzO2tXd593_bjErWg")]
#[case::livestreams("livestreams", "UC2DjFE7Xf11URZqWBigcVOQ")]
#[case::pageheader("shorts_20240129_pageheader", "UCh8gHdtzO2tXd593_bjErWg")]
fn map_channel_videos(#[case] name: &str, #[case] id: &str) {
let json_path = path!(*TESTFILES / "channel" / format!("channel_{name}.json"));
let json_file = File::open(json_path).unwrap();

View file

@ -2,8 +2,8 @@ use serde::Deserialize;
use serde_with::{rust::deserialize_ignore_any, serde_as, DefaultOnError, VecSkipError};
use super::{
video_item::YouTubeListRenderer, Alert, ChannelBadge, ContentsRenderer, ContinuationActionWrap,
ResponseContext, Thumbnails, TwoColumnBrowseResults,
video_item::YouTubeListRenderer, Alert, ChannelBadge, ContentRenderer, ContentsRenderer,
ContinuationActionWrap, ResponseContext, Thumbnails, TwoColumnBrowseResults,
};
use crate::serializer::text::{AttributedText, Text, TextComponent};
@ -71,10 +71,12 @@ pub(crate) struct ChannelTabWebCommandMetadata {
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
#[allow(clippy::enum_variant_names)]
pub(crate) enum Header {
C4TabbedHeaderRenderer(HeaderRenderer),
/// Used for special channels like YouTube Music
CarouselHeaderRenderer(ContentsRenderer<CarouselHeaderRendererItem>),
PageHeaderRenderer(ContentRenderer<PageHeaderRenderer>),
}
#[serde_as]
@ -117,6 +119,149 @@ pub(crate) enum CarouselHeaderRendererItem {
None,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct PageHeaderRenderer {
pub page_header_view_model: PageHeaderRendererInner,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct PageHeaderRendererInner {
pub title: PhTitleView,
pub image: PhAvatarView,
pub metadata: PhMetadataView,
pub banner: PhBannerView,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct PhTitleView {
pub dynamic_text_view_model: PhTitleView2,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct PhTitleView2 {
pub text: PhTitleView3,
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct PhTitleView3 {
#[serde_as(as = "VecSkipError<_>")]
pub attachment_runs: Vec<AttachmentRun>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct AttachmentRun {
pub element: AttachmentRunElement,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct AttachmentRunElement {
#[serde(rename = "type")]
pub typ: AttachmentRunElementType,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct AttachmentRunElementType {
pub image_type: AttachmentRunElementImageType,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct AttachmentRunElementImageType {
pub image: AttachmentRunElementImage,
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct AttachmentRunElementImage {
#[serde_as(as = "VecSkipError<_>")]
pub sources: Vec<AttachmentRunElementImageSource>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct AttachmentRunElementImageSource {
pub client_resource: ClientResource,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct ClientResource {
pub image_name: IconName,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
pub(crate) enum IconName {
CheckCircleFilled,
MusicFilled,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct PhAvatarView {
pub decorated_avatar_view_model: PhAvatarView2,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct PhAvatarView2 {
pub avatar: PhAvatarView3,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct PhAvatarView3 {
pub avatar_view_model: ImageView,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct ImageView {
pub image: Thumbnails,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct PhMetadataView {
pub content_metadata_view_model: PhMetadataView2,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct PhMetadataView2 {
pub metadata_rows: Vec<PhMetadataRow>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct PhMetadataRow {
pub metadata_parts: Vec<TextWrap>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct PhBannerView {
pub image_banner_view_model: ImageView,
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct TextWrap {
#[serde_as(deserialize_as = "Text")]
pub text: String,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct Metadata {
@ -214,3 +359,22 @@ pub(crate) struct ExternalLinkInner {
#[serde_as(as = "AttributedText")]
pub link: TextComponent,
}
impl From<PhTitleView> for crate::model::Verification {
fn from(value: PhTitleView) -> Self {
value
.dynamic_text_view_model
.text
.attachment_runs
.iter()
.find_map(|r| {
r.element.typ.image_type.image.sources.first().map(|s| {
match s.client_resource.image_name {
IconName::CheckCircleFilled => crate::model::Verification::Verified,
IconName::MusicFilled => crate::model::Verification::Artist,
}
})
})
.unwrap_or_default()
}
}

View file

@ -108,7 +108,7 @@ pub(crate) struct ThumbnailsWrap {
#[derive(Default, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct Thumbnails {
#[serde(default)]
#[serde(default, alias = "sources")]
pub thumbnails: Vec<Thumbnail>,
}

File diff suppressed because it is too large Load diff

View file

@ -1175,13 +1175,23 @@ mod channel_rss {
#[rstest]
fn get_channel_rss_empty(rp: RustyPipe) {
let channel =
tokio_test::block_on(rp.query().channel_rss("UCAyFbMjB3qAQSZBj6NCuBSg")).unwrap();
let channel = tokio_test::block_on(rp.query().channel_rss("UCAyFbMjB3qAQSZBj6NCuBSg"));
assert_eq!(channel.id, "UCAyFbMjB3qAQSZBj6NCuBSg");
assert_eq!(channel.name, "Cheryl Calogero");
match channel {
Ok(channel) => {
assert_eq!(channel.id, "UCAyFbMjB3qAQSZBj6NCuBSg");
assert_eq!(channel.name, "Cheryl Calogero");
assert!(channel.videos.is_empty());
assert!(channel.videos.is_empty());
}
Err(err) => {
assert!(
matches!(err, Error::Extraction(ExtractionError::NotFound { .. })),
"got: {}",
err
);
}
}
}
#[rstest]